Moved scripts to other repo, CI setup

This commit is contained in:
simon987 2019-05-08 20:58:13 -04:00
parent 0acffff609
commit 0e998eaf9b
49 changed files with 597 additions and 905 deletions

1
.gitignore vendored
View File

@ -8,7 +8,6 @@ __pycache__/
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/

6
.gitmodules vendored
View File

@ -1,6 +0,0 @@
[submodule "task_tracker_drone"]
path = task_tracker_drone
url = https://github.com/simon987/task_tracker_drone/
[submodule "last.fm"]
path = last.fm
url = https://git.simon987.net/drone/last.fm

View File

@ -6,43 +6,6 @@
wip
### Data import from MusicBrainz & Last.fm
```bash
# Download latest database dump
./get_musicbrainz_dump.sh
# Convert to .csv
python convert_mb.py
# Generate scraping tasks for task_tracker_drone (See notes)
python generate_scrape_tasks.py
# Apply last.fm data to artist.csv
python patch_artists_with_lastfm.py "/path/to/lasfm_data.db"
# Expose generated .csv data to the network
cd repo/
python -m http.server 9999
# On the machine where neo4j is installed:
./import.sh
```
### task_tracker setup:
Last.fm api calls are queued to [task_tracker](https://github.com/simon987/task_tracker/),
and results are gathered by a [task_tracker_drone](https://github.com/simon987/task_tracker_drone/)
([script](https://git.simon987.net/drone/last.fm/src/master/run)).
Project secret:
```json
{
"apikey": "<Your Last.fm api key>",
"user": "<Your Last.fm username>"
}
```
### Api setup

View File

120
caa/run
View File

@ -1,120 +0,0 @@
#!/usr/bin/env python
import json
import os
import random
import sqlite3
import sys
import traceback
from io import BytesIO
from pathlib import Path
import PIL
import requests
from PIL import Image
PIL.Image.MAX_IMAGE_PIXELS = 933120000
current_mbid = ""
def should_download(image: dict):
return image["front"] is True
def thumb(cover_blob):
with Image.open(BytesIO(cover_blob)) as image:
# https://stackoverflow.com/questions/43978819
if image.mode == "I;16":
image.mode = "I"
image.point(lambda i: i * (1. / 256)).convert('L')
image.thumbnail((256, 256), Image.BICUBIC)
canvas = Image.new("RGB", image.size, 0x000000)
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
try:
canvas.paste(image, mask=image.split()[-1])
except ValueError:
canvas.paste(image)
else:
canvas.paste(image)
blob = BytesIO()
canvas.save(blob, "JPEG", quality=85, optimize=True)
canvas.close()
return blob.getvalue()
def download(mbid):
global current_mbid
current_mbid = mbid
r = requests.get("https://archive.org/metadata/mbid-" + mbid)
meta = r.json()
if "files" not in meta or "workable_servers" not in meta or not meta["workable_servers"]:
return
directory = "https://" + random.choice(meta["workable_servers"]) + meta["dir"]
index = directory + "/index.json"
r = requests.get(index)
if r.status_code == 404:
mb_meta = meta
urls = [
directory + "/" + f["name"]
for f in meta["files"] if "thumb" not in f["name"] and
not f["name"].endswith((".xml", ".txt", ".json", ".torrent"))
]
else:
mb_meta = r.json()
urls = [
directory + "/mbid-" + mbid + "-" + image["image"][image["image"].rfind("/") + 1:]
for image in mb_meta["images"] if should_download(image)
]
if not urls:
return
cover = requests.get(urls[0]).content
if cover:
dbfile = "/mnt/Data7/caa_p2.db"
if not os.path.exists(dbfile):
with sqlite3.connect(dbfile, timeout=30000) as conn:
c = conn.cursor()
c.execute(
"CREATE TABLE covers(id TEXT, cover BLOB, tn BLOB, meta TEXT, ts TEXT default CURRENT_TIMESTAMP)")
try:
tn = thumb(cover)
except:
tn = None
with sqlite3.connect(dbfile, timeout=30000) as conn:
c = conn.cursor()
c.execute("INSERT INTO covers (id, cover, tn, meta) VALUES (?,?,?,?)",
(mbid, cover, tn, json.dumps(mb_meta),))
try:
task_str = sys.argv[1]
task = json.loads(task_str)
mbids = json.loads(task["recipe"])
for mbid in mbids:
download(mbid)
except Exception as e:
print(json.dumps({
"result": 1,
"logs": [
{"message": str(e) + "$$" + current_mbid + "$$" + traceback.format_exc(), "level": 3}
]
}))
quit(2)
print(json.dumps({
"result": 0,
}))

View File

@ -1,20 +0,0 @@
#!/usr/bin/env bash
latest=$(curl http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/LATEST)
mkdir in 2> /dev/null
cd in
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump.tar.bz2"
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump-derived.tar.bz2"
tar -xjvf mbdump.tar.bz2 mbdump/area mbdump/artist mbdump/l_area_area mbdump/l_artist_artist \
mbdump/l_artist_release mbdump/l_artist_release_group mbdump/l_label_label mbdump/l_release_group_release_group \
mbdump/label mbdump/label_type mbdump/link mbdump/link_type mbdump/release mbdump/release_group \
mbdump/release_group_primary_type mbdump/artist_credit_name mbdump/release_status
tar -xjvf mbdump-derived.tar.bz2 mbdump/artist_tag mbdump/release_group_tag mbdump/tag mbdump/tag_relation \
mbdump/release_group_meta
mv mbdump/* .
rm -r mbdump
cd ..

View File

@ -1,27 +0,0 @@
import sqlite3
import sys
with sqlite3.connect(sys.argv[1]) as conn:
cursor = conn.cursor()
cursor.execute("SELECT id from covers")
cursor = conn.cursor()
cursor.execute("SELECT id from covers")
def rows():
buf = list()
for row in cursor.fetchall():
buf.append(row[0])
if len(buf) > 30:
yield buf
buf.clear()
for batch in rows():
cursor.execute("SELECT cover from covers where id in (%s)" % (",".join(("'" + b + "'") for b in batch)))
covers = cursor.fetchall()
for i, cover in enumerate(covers):
with open("./tmpcovers/" + batch[i] + ".jpg", "wb") as out:
out.write(cover[0])
print(batch[i])

View File

@ -1,56 +0,0 @@
import json
from multiprocessing.pool import ThreadPool
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
TT_API_URL = "https://tt.simon987.net/api"
TT_PROJECT = 5
done = set()
# with sqlite3.connect(sys.argv[1]) as conn:
# cur = conn.cursor()
# cur.execute("SELECT id FROM covers")
# for mbid in cur.fetchall():
# done.add(mbid[0])
api = TaskTrackerApi(TT_API_URL)
worker = Worker.from_file(api)
if not worker:
worker = api.make_worker("caa scraper")
worker.dump_to_file()
worker.request_access(TT_PROJECT, True, True)
input("Give permission to " + worker.alias)
def mktask(mbids):
res = worker.submit_task(
project=TT_PROJECT,
recipe=json.dumps(mbids),
hash64=hash(mbids[0]),
max_assign_time=60 * 30,
priority=1,
unique_str=None,
verification_count=None,
max_retries=5,
)
print(res.text)
def lines():
with open("in/release") as f:
buf = list()
for line in f:
cols = line.split("\t")
buf.append(cols[1])
if len(buf) == 75:
a = list(buf)
buf.clear()
yield a
pool = ThreadPool(processes=20)
pool.map(func=mktask, iterable=lines())

View File

@ -1,48 +0,0 @@
import csv
import json
from multiprocessing.pool import ThreadPool
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
TT_API_URL = "https://tt.simon987.net/api"
TT_PROJECT = 1
api = TaskTrackerApi(TT_API_URL)
worker = Worker.from_file(api)
if not worker:
worker = api.make_worker("last.fm scraper")
worker.dump_to_file()
worker.request_access(TT_PROJECT, True, True)
input("Give permission to " + worker.alias)
with open("repo/artist.csv") as f:
reader = csv.reader(f)
def mktask(lines):
res = worker.submit_task(
project=TT_PROJECT,
recipe=json.dumps(
[{"mbid": line[0], "name": line[1]} for line in lines]
),
unique_str=lines[0][0],
max_assign_time=60 * 5,
)
print(res.text)
def lines():
line_batch = list()
for line in reader:
if "Group" in line[3]:
line_batch.append(line)
if len(line_batch) >= 30:
res = list(line_batch)
line_batch.clear()
yield res
tasks = list(lines())
pool = ThreadPool(processes=25)
pool.map(func=mktask, iterable=tasks)

41
jenkins/Jenkinsfile vendored
View File

@ -0,0 +1,41 @@
def remote = [:]
remote.name = 'remote'
remote.host = env.DEPLOY_HOST
remote.user = env.DEPLOY_USER
remote.identityFile = '/var/lib/jenkins/.ssh/id_rsa'
remote.knownHosts = '/var/lib/jenkins/.ssh/known_hosts'
remote.allowAnyHosts = true
remote.retryCount = 3
remote.retryWaitSec = 3
logLevel = 'FINER'
pipeline {
stages {
stage('Build') {
agent {
docker {
image 'node:10-alpine'
args '--network "host"'
}
}
steps {
sh 'cd music_graph/ && npm install && npm audit fix && npm run build'
sh 'mv music_graph/dist webroot'
stash includes: 'webroot/', name: 'webdist'
}
}
stage('Deploy') {
agent none
steps {
node('master') {
unstash 'webdist'
sshCommand remote: remote, command: "cd simon987 && rm -rf webroot/* deploy.sh"
sshPut remote: remote, from: 'webroot/', into: 'music-graph'
sshPut remote: remote, from: 'jenkins/deploy.sh', into: 'music-graph/'
sshCommand remote: remote, command: 'chmod +x music-graph/deploy.sh && ./music-graph/deploy.sh'
}
}
}
}
}

View File

@ -1,4 +1 @@
#!/usr/bin/env bash
git submodule init
git submodule update --remote

View File

@ -1 +1,5 @@
#!/usr/bin/env bash
export MGROOT="music-graph"
chmod 755 -R "${MGROOT}/webroot"

@ -1 +0,0 @@
Subproject commit 855df64c316930062ff4f7740492d0f039788498

View File

@ -1,53 +0,0 @@
#!/bin/bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
export REPOSITORY="http://localhost:9999"
export DATABASE="graph.db"
rm -rf "${NEO4J_HOME}/data/databases/${DATABASE}"
cp ${NEO4J_HOME}/conf/neo4j.conf ${NEO4J_HOME}/conf/neo4j.conf.bak
echo "dbms.security.auth_enabled=false" >> ${NEO4J_HOME}/conf/neo4j.conf
mkdir workspace 2> /dev/null
cd workspace
rm *.csv
wget ${REPOSITORY}/area.csv
wget ${REPOSITORY}/area_area.csv
wget ${REPOSITORY}/lastfm_artist.csv
wget ${REPOSITORY}/artist_area.csv
wget ${REPOSITORY}/artist_artist.csv
wget ${REPOSITORY}/artist_release.csv
wget ${REPOSITORY}/release.csv
wget ${REPOSITORY}/tag.csv
wget ${REPOSITORY}/tag_tag.csv
wget ${REPOSITORY}/release_tag.csv
wget ${REPOSITORY}/release_release.csv
wget ${REPOSITORY}/artist_tag.csv
wget ${REPOSITORY}/labels.csv
wget ${REPOSITORY}/label_label.csv
wget ${REPOSITORY}/lastfm_artist_artist.csv
. ${NEO4J_HOME}/bin/neo4j-admin import \
--database ${DATABASE}\
--high-io=true\
--nodes:Area:MusicBrainzEntity "area.csv"\
--nodes:MusicBrainzEntity "release.csv"\
--nodes:MusicBrainzEntity "lastfm_artist.csv"\
--nodes:Tag "tag.csv"\
--nodes:MusicBrainzEntity "labels.csv"\
--relationships:IS_PART_OF "area_area.csv"\
--relationships:IS_BASED_IN "artist_area.csv"\
--relationships "artist_artist.csv"\
--relationships "artist_release.csv"\
--relationships:IS_TAGGED "release_tag.csv"\
--relationships:IS_TAGGED "artist_tag.csv"\
--relationships:IS_RELATED_TO "tag_tag.csv"\
--relationships "label_label.csv"\
--relationships "release_release.csv"\
--relationships:IS_RELATED_TO "lastfm_artist_artist.csv"
rm *.csv
cd ..

View File

@ -1,31 +0,0 @@
import sqlite3
release_to_release_group_map = dict()
release_groups = dict()
with open("in/release_group") as f:
for line in f:
cols = line.split("\t")
release_groups[cols[0]] = cols[1]
with open("in/release") as f:
for line in f:
cols = line.split("\t")
release_to_release_group_map[cols[1]] = release_groups[cols[4]]
with sqlite3.connect("mapdb.db") as conn:
cursor = conn.cursor()
cursor.execute("CREATE TABLE map (release TEXT PRIMARY KEY , release_group TEXT)")
for k, v in release_to_release_group_map.items():
cursor.execute("INSERT INTO map (release, release_group) VALUES (?,?)", (k, v))
conn.commit()
"""
CREATE TABLE covers (id TEXT primary key, cover BLOB);
ATTACH 'mapdb.db' AS map;
ATTACH '/mnt/Data8/caa_tn_only.db' AS source;
INSERT OR IGNORE INTO covers SELECT release_group, cover FROM source.covers INNER JOIN map.map ON id = map.release;
"""

View File

@ -0,0 +1,41 @@
'use strict'
require('./check-versions')()
process.env.NODE_ENV = 'production'
const ora = require('ora')
const rm = require('rimraf')
const path = require('path')
const chalk = require('chalk')
const webpack = require('webpack')
const config = require('../config')
const webpackConfig = require('./webpack.prod.conf')
const spinner = ora('building for production...')
spinner.start()
rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), err => {
if (err) throw err
webpack(webpackConfig, (err, stats) => {
spinner.stop()
if (err) throw err
process.stdout.write(stats.toString({
colors: true,
modules: false,
children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build.
chunks: false,
chunkModules: false
}) + '\n\n')
if (stats.hasErrors()) {
console.log(chalk.red(' Build failed with errors.\n'))
process.exit(1)
}
console.log(chalk.cyan(' Build complete.\n'))
console.log(chalk.yellow(
' Tip: built files are meant to be served over an HTTP server.\n' +
' Opening index.html over file:// won\'t work.\n'
))
})
})

View File

@ -0,0 +1,54 @@
'use strict'
const chalk = require('chalk')
const semver = require('semver')
const packageConfig = require('../package.json')
const shell = require('shelljs')
function exec (cmd) {
return require('child_process').execSync(cmd).toString().trim()
}
const versionRequirements = [
{
name: 'node',
currentVersion: semver.clean(process.version),
versionRequirement: packageConfig.engines.node
}
]
if (shell.which('npm')) {
versionRequirements.push({
name: 'npm',
currentVersion: exec('npm --version'),
versionRequirement: packageConfig.engines.npm
})
}
module.exports = function () {
const warnings = []
for (let i = 0; i < versionRequirements.length; i++) {
const mod = versionRequirements[i]
if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) {
warnings.push(mod.name + ': ' +
chalk.red(mod.currentVersion) + ' should be ' +
chalk.green(mod.versionRequirement)
)
}
}
if (warnings.length) {
console.log('')
console.log(chalk.yellow('To use this template, you must update following to modules:'))
console.log()
for (let i = 0; i < warnings.length; i++) {
const warning = warnings[i]
console.log(' ' + warning)
}
console.log()
process.exit(1)
}
}

101
music_graph/build/utils.js Normal file
View File

@ -0,0 +1,101 @@
'use strict'
const path = require('path')
const config = require('../config')
const ExtractTextPlugin = require('extract-text-webpack-plugin')
const packageConfig = require('../package.json')
exports.assetsPath = function (_path) {
const assetsSubDirectory = process.env.NODE_ENV === 'production'
? config.build.assetsSubDirectory
: config.dev.assetsSubDirectory
return path.posix.join(assetsSubDirectory, _path)
}
exports.cssLoaders = function (options) {
options = options || {}
const cssLoader = {
loader: 'css-loader',
options: {
sourceMap: options.sourceMap
}
}
const postcssLoader = {
loader: 'postcss-loader',
options: {
sourceMap: options.sourceMap
}
}
// generate loader string to be used with extract text plugin
function generateLoaders (loader, loaderOptions) {
const loaders = options.usePostCSS ? [cssLoader, postcssLoader] : [cssLoader]
if (loader) {
loaders.push({
loader: loader + '-loader',
options: Object.assign({}, loaderOptions, {
sourceMap: options.sourceMap
})
})
}
// Extract CSS when that option is specified
// (which is the case during production build)
if (options.extract) {
return ExtractTextPlugin.extract({
use: loaders,
fallback: 'vue-style-loader'
})
} else {
return ['vue-style-loader'].concat(loaders)
}
}
// https://vue-loader.vuejs.org/en/configurations/extract-css.html
return {
css: generateLoaders(),
postcss: generateLoaders(),
less: generateLoaders('less'),
sass: generateLoaders('sass', { indentedSyntax: true }),
scss: generateLoaders('sass'),
stylus: generateLoaders('stylus'),
styl: generateLoaders('stylus')
}
}
// Generate loaders for standalone style files (outside of .vue)
exports.styleLoaders = function (options) {
const output = []
const loaders = exports.cssLoaders(options)
for (const extension in loaders) {
const loader = loaders[extension]
output.push({
test: new RegExp('\\.' + extension + '$'),
use: loader
})
}
return output
}
exports.createNotifierCallback = () => {
const notifier = require('node-notifier')
return (severity, errors) => {
if (severity !== 'error') return
const error = errors[0]
const filename = error.file && error.file.split('!').pop()
notifier.notify({
title: packageConfig.name,
message: severity + ': ' + error.name,
subtitle: filename || '',
icon: path.join(__dirname, 'logo.png')
})
}
}

View File

@ -0,0 +1,22 @@
'use strict'
const utils = require('./utils')
const config = require('../config')
const isProduction = process.env.NODE_ENV === 'production'
const sourceMapEnabled = isProduction
? config.build.productionSourceMap
: config.dev.cssSourceMap
module.exports = {
loaders: utils.cssLoaders({
sourceMap: sourceMapEnabled,
extract: isProduction
}),
cssSourceMap: sourceMapEnabled,
cacheBusting: config.dev.cacheBusting,
transformToRequire: {
video: ['src', 'poster'],
source: 'src',
img: 'src',
image: 'xlink:href'
}
}

View File

@ -0,0 +1,91 @@
'use strict'
const path = require('path')
const utils = require('./utils')
const config = require('../config')
const vueLoaderConfig = require('./vue-loader.conf')
function resolve (dir) {
return path.join(__dirname, '..', dir)
}
const createLintingRule = () => ({
test: /\.(js|vue)$/,
loader: 'eslint-loader',
enforce: 'pre',
include: [resolve('src'), resolve('test')],
options: {
formatter: require('eslint-friendly-formatter'),
emitWarning: !config.dev.showEslintErrorsInOverlay
}
})
module.exports = {
context: path.resolve(__dirname, '../'),
entry: {
app: './src/main.js'
},
output: {
path: config.build.assetsRoot,
filename: '[name].js',
publicPath: process.env.NODE_ENV === 'production'
? config.build.assetsPublicPath
: config.dev.assetsPublicPath
},
resolve: {
extensions: ['.js', '.vue', '.json'],
alias: {
'@': resolve('src'),
}
},
module: {
rules: [
...(config.dev.useEslint ? [createLintingRule()] : []),
{
test: /\.vue$/,
loader: 'vue-loader',
options: vueLoaderConfig
},
{
test: /\.js$/,
loader: 'babel-loader',
include: [resolve('src'), resolve('test'), resolve('node_modules/webpack-dev-server/client')]
},
{
test: /\.(png|jpe?g|gif|svg)(\?.*)?$/,
loader: 'url-loader',
options: {
limit: 10000,
name: utils.assetsPath('img/[name].[hash:7].[ext]')
}
},
{
test: /\.(mp4|webm|ogg|mp3|wav|flac|aac)(\?.*)?$/,
loader: 'url-loader',
options: {
limit: 10000,
name: utils.assetsPath('media/[name].[hash:7].[ext]')
}
},
{
test: /\.(woff2?|eot|ttf|otf)(\?.*)?$/,
loader: 'url-loader',
options: {
limit: 10000,
name: utils.assetsPath('fonts/[name].[hash:7].[ext]')
}
}
]
},
node: {
// prevent webpack from injecting useless setImmediate polyfill because Vue
// source contains it (although only uses it if it's native).
setImmediate: false,
// prevent webpack from injecting mocks to Node native modules
// that does not make sense for the client
dgram: 'empty',
fs: 'empty',
net: 'empty',
tls: 'empty',
child_process: 'empty'
}
}

View File

@ -0,0 +1,95 @@
'use strict'
const utils = require('./utils')
const webpack = require('webpack')
const config = require('../config')
const merge = require('webpack-merge')
const path = require('path')
const baseWebpackConfig = require('./webpack.base.conf')
const CopyWebpackPlugin = require('copy-webpack-plugin')
const HtmlWebpackPlugin = require('html-webpack-plugin')
const FriendlyErrorsPlugin = require('friendly-errors-webpack-plugin')
const portfinder = require('portfinder')
const HOST = process.env.HOST
const PORT = process.env.PORT && Number(process.env.PORT)
const devWebpackConfig = merge(baseWebpackConfig, {
module: {
rules: utils.styleLoaders({ sourceMap: config.dev.cssSourceMap, usePostCSS: true })
},
// cheap-module-eval-source-map is faster for development
devtool: config.dev.devtool,
// these devServer options should be customized in /config/index.js
devServer: {
clientLogLevel: 'warning',
historyApiFallback: {
rewrites: [
{ from: /.*/, to: path.posix.join(config.dev.assetsPublicPath, 'index.html') },
],
},
hot: true,
contentBase: false, // since we use CopyWebpackPlugin.
compress: true,
host: HOST || config.dev.host,
port: PORT || config.dev.port,
open: config.dev.autoOpenBrowser,
overlay: config.dev.errorOverlay
? { warnings: false, errors: true }
: false,
publicPath: config.dev.assetsPublicPath,
proxy: config.dev.proxyTable,
quiet: true, // necessary for FriendlyErrorsPlugin
watchOptions: {
poll: config.dev.poll,
}
},
plugins: [
new webpack.DefinePlugin({
'process.env': require('../config/dev.env')
}),
new webpack.HotModuleReplacementPlugin(),
new webpack.NamedModulesPlugin(), // HMR shows correct file names in console on update.
new webpack.NoEmitOnErrorsPlugin(),
// https://github.com/ampedandwired/html-webpack-plugin
new HtmlWebpackPlugin({
filename: 'index.html',
template: 'index.html',
inject: true
}),
// copy custom static assets
new CopyWebpackPlugin([
{
from: path.resolve(__dirname, '../static'),
to: config.dev.assetsSubDirectory,
ignore: ['.*']
}
])
]
})
module.exports = new Promise((resolve, reject) => {
portfinder.basePort = process.env.PORT || config.dev.port
portfinder.getPort((err, port) => {
if (err) {
reject(err)
} else {
// publish the new Port, necessary for e2e tests
process.env.PORT = port
// add port to devServer config
devWebpackConfig.devServer.port = port
// Add FriendlyErrorsPlugin
devWebpackConfig.plugins.push(new FriendlyErrorsPlugin({
compilationSuccessInfo: {
messages: [`Your application is running here: http://${devWebpackConfig.devServer.host}:${port}`],
},
onErrors: config.dev.notifyOnErrors
? utils.createNotifierCallback()
: undefined
}))
resolve(devWebpackConfig)
}
})
})

View File

@ -0,0 +1,145 @@
'use strict'
const path = require('path')
const utils = require('./utils')
const webpack = require('webpack')
const config = require('../config')
const merge = require('webpack-merge')
const baseWebpackConfig = require('./webpack.base.conf')
const CopyWebpackPlugin = require('copy-webpack-plugin')
const HtmlWebpackPlugin = require('html-webpack-plugin')
const ExtractTextPlugin = require('extract-text-webpack-plugin')
const OptimizeCSSPlugin = require('optimize-css-assets-webpack-plugin')
const UglifyJsPlugin = require('uglifyjs-webpack-plugin')
const env = require('../config/prod.env')
const webpackConfig = merge(baseWebpackConfig, {
module: {
rules: utils.styleLoaders({
sourceMap: config.build.productionSourceMap,
extract: true,
usePostCSS: true
})
},
devtool: config.build.productionSourceMap ? config.build.devtool : false,
output: {
path: config.build.assetsRoot,
filename: utils.assetsPath('js/[name].[chunkhash].js'),
chunkFilename: utils.assetsPath('js/[id].[chunkhash].js')
},
plugins: [
// http://vuejs.github.io/vue-loader/en/workflow/production.html
new webpack.DefinePlugin({
'process.env': env
}),
new UglifyJsPlugin({
uglifyOptions: {
compress: {
warnings: false
}
},
sourceMap: config.build.productionSourceMap,
parallel: true
}),
// extract css into its own file
new ExtractTextPlugin({
filename: utils.assetsPath('css/[name].[contenthash].css'),
// Setting the following option to `false` will not extract CSS from codesplit chunks.
// Their CSS will instead be inserted dynamically with style-loader when the codesplit chunk has been loaded by webpack.
// It's currently set to `true` because we are seeing that sourcemaps are included in the codesplit bundle as well when it's `false`,
// increasing file size: https://github.com/vuejs-templates/webpack/issues/1110
allChunks: true,
}),
// Compress extracted CSS. We are using this plugin so that possible
// duplicated CSS from different components can be deduped.
new OptimizeCSSPlugin({
cssProcessorOptions: config.build.productionSourceMap
? { safe: true, map: { inline: false } }
: { safe: true }
}),
// generate dist index.html with correct asset hash for caching.
// you can customize output by editing /index.html
// see https://github.com/ampedandwired/html-webpack-plugin
new HtmlWebpackPlugin({
filename: config.build.index,
template: 'index.html',
inject: true,
minify: {
removeComments: true,
collapseWhitespace: true,
removeAttributeQuotes: true
// more options:
// https://github.com/kangax/html-minifier#options-quick-reference
},
// necessary to consistently work with multiple chunks via CommonsChunkPlugin
chunksSortMode: 'dependency'
}),
// keep module.id stable when vendor modules does not change
new webpack.HashedModuleIdsPlugin(),
// enable scope hoisting
new webpack.optimize.ModuleConcatenationPlugin(),
// split vendor js into its own file
new webpack.optimize.CommonsChunkPlugin({
name: 'vendor',
minChunks (module) {
// any required modules inside node_modules are extracted to vendor
return (
module.resource &&
/\.js$/.test(module.resource) &&
module.resource.indexOf(
path.join(__dirname, '../node_modules')
) === 0
)
}
}),
// extract webpack runtime and module manifest to its own file in order to
// prevent vendor hash from being updated whenever app bundle is updated
new webpack.optimize.CommonsChunkPlugin({
name: 'manifest',
minChunks: Infinity
}),
// This instance extracts shared chunks from code splitted chunks and bundles them
// in a separate chunk, similar to the vendor chunk
// see: https://webpack.js.org/plugins/commons-chunk-plugin/#extra-async-commons-chunk
new webpack.optimize.CommonsChunkPlugin({
name: 'app',
async: 'vendor-async',
children: true,
minChunks: 3
}),
// copy custom static assets
new CopyWebpackPlugin([
{
from: path.resolve(__dirname, '../static'),
to: config.build.assetsSubDirectory,
ignore: ['.*']
}
])
]
})
if (config.build.productionGzip) {
const CompressionWebpackPlugin = require('compression-webpack-plugin')
webpackConfig.plugins.push(
new CompressionWebpackPlugin({
asset: '[path].gz[query]',
algorithm: 'gzip',
test: new RegExp(
'\\.(' +
config.build.productionGzipExtensions.join('|') +
')$'
),
threshold: 10240,
minRatio: 0.8
})
)
}
if (config.build.bundleAnalyzerReport) {
const BundleAnalyzerPlugin = require('webpack-bundle-analyzer').BundleAnalyzerPlugin
webpackConfig.plugins.push(new BundleAnalyzerPlugin())
}
module.exports = webpackConfig

3
package-lock.json generated Normal file
View File

@ -0,0 +1,3 @@
{
"lockfileVersion": 1
}

View File

@ -1,100 +0,0 @@
import csv
import json
import sqlite3
from collections import defaultdict
import sys
artists = set()
def disambiguate(lfm_artist, artist_release_count, name, mbid):
existing_mbid = lfm_artist.get(name, None)
if existing_mbid and mbid != existing_mbid:
if artist_release_count[existing_mbid] < artist_release_count[mbid]:
lfm_artist[name] = mbid
print("Replacing %s (%s) with %s (%d) for %s" %
(existing_mbid, artist_release_count[existing_mbid],
mbid, artist_release_count[mbid],
name))
else:
lfm_artist[name] = mbid
def patch(lastfm_data):
artist_listeners = dict()
lastfm_artist_to_mbid = dict()
artist_release_count = defaultdict(int)
related = list()
with open("repo/artist_release.csv") as f:
for line in f:
cols = line.split(',')
artist_release_count[cols[0]] += 1
with sqlite3.connect(lastfm_data) as conn:
cur = conn.cursor()
cur.execute("SELECT data FROM lastfmdata", )
data = list(cur.fetchall())
# A lastfm artist name can refer to multiple MBIDs
# For RELATED_TO purposes, we assume that the MBID referring
# to the artist with the most official releases is the one
for row in data:
meta = json.loads(row[0])
disambiguate(lastfm_artist_to_mbid, artist_release_count, meta["name"], meta["artist"])
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
disambiguate(lastfm_artist_to_mbid, artist_release_count, similar["name"], similar["mbid"])
# Get related links & listener counts
for row in data:
meta = json.loads(row[0])
artist_listeners[lastfm_artist_to_mbid[meta["name"]]] = \
(meta["listeners"], meta["playcount"])
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
related.append((
lastfm_artist_to_mbid[similar["name"]],
lastfm_artist_to_mbid[meta["name"]],
similar["match"]
))
with open("repo/lastfm_artist.csv", "w") as out:
writer = csv.writer(out)
writer.writerow([
"id:ID(Artist)", "name", "year:short", ":LABEL", "listeners:int", "playcount:int"
])
with open("repo/artist.csv") as f:
reader = csv.reader(f)
reader.__next__() # Skip header
for row in reader:
writer.writerow([
row[0],
row[1],
row[2],
row[3],
artist_listeners.get(row[0], (0, 0))[0],
artist_listeners.get(row[0], (0, 0))[1],
])
artists.add(row[0])
with open("repo/lastfm_artist_artist.csv", "w") as out:
out.write(",".join((
":START_ID(Artist)", ":END_ID(Artist)", "weight:float"
)) + "\n")
for x in related:
if x[0] in artists and x[1] in artists:
out.write(",".join(x) + "\n")
patch(sys.argv[1])

View File

@ -1,393 +0,0 @@
import os
from collections import defaultdict
links = dict()
link_types = dict()
areas = dict()
labels = dict()
label_types = {
"\\N": ""
}
release_groups = dict()
release_statuses = dict()
release_to_release_group_map = dict()
release_types = {
"\\N": "",
}
artists = dict()
tags = dict()
release_release_rel_map = {
"covers and versions": "",
"remixes and compilations": "",
"DJ-mix": "IS_DJ_MIX_OF",
"live performance": "IS_LIVE_PERFORMANCE_OF",
"cover": "IS_COVER_OF",
"remix": "IS_REMIX_OF",
"mashes up": "IS_MASHUP_OF",
"included in": "INCLUDED_IN",
"single from": "IS_SINGLE_FROM"
}
artist_release_rel_map = {
"translator": "TRANSLATED",
"liner notes": "WROTE_LINER_NOTES",
"lyricist": "IS_LYRICIST_FOR",
"lacquer cut": "DID_LACQUER_CUT_FOR",
"samples from artist": "HAS_SAMPLES_IN",
"remixes and compilations": "",
"composition": "COMPOSED",
"booking": "DID_BOOKING_FOR",
"balance": "DID_BALANCE_FOR",
"misc": "HAS_MISC_ROLE_IN",
"conductor": "CONDUCTED",
"legal representation": "PROVIDED_LEGAL_REPRESENTATION_FOR",
"design/illustration": "DID_DESIGN_FOR",
"performing orchestra": "PERFORMED_FOR",
"producer": "PRODUCED",
"instrument": "PERFORMED_INSTRUMENT_FOR",
"writer": "WROTE_LYRICS_FOR",
"production": "DID_PRODUCTION_FOR",
"performance": "PERFORMED_FOR",
"composer": "IS_COMPOSER_FOR",
"sound": "DID_SOUND_FOR",
"remixer": "DID_REMIXING_FOR",
"orchestrator": "IS_ORCHESTRATOR_FOR",
"compiler": "DID_COMPILATION_FOR",
"vocal arranger": "IS_ARRANGER_FOR",
"arranger": "IS_ARRENGER_FOR",
"mix-DJ": "MIXED",
"editor": "IS_EDITOR_FOR",
"illustration": "DID_ILLUSTRATION_FOR",
"audio": "DID_AUDIO_FOR",
"publishing": "IS_PUBLISHER_FOR",
"art direction": "DID_ART_DIRECTOR_FOR",
"design": "DID_DESIGN_FOR",
"instrument arranger": "IS_ARRANGER_FOR",
"chorus master": "IS_CHORUS_MASTER_FOR",
"photography": "DID_PHOTOGRAPHY_FOR",
"performer": "PERFORMED_IN",
"graphic design": "DID_GRAPHIC_DESIGN_FOR",
"booklet editor": "IS_BOOKLET_EDITOR_FOR",
"programming": "DID_PROGRAMING_FOR",
"copyright": "IS_COPYRIGHT_HOLDER_OF",
"piano technician": "IS_PIANO_TECNICIAN_FOR",
"phonographic copyright": "IS_PHONOGRAPHIC_COPYRIGHT_HOLDER_OF",
"mastering": "DID_MASTERING_FOR",
"vocal": "PERFORED_VOCALS_FOR",
"librettist": "IS_LIBRETTIST_FOR",
"mix": "MIXED",
"recording": "DID_RECORDING_FOR",
"concertmaster": "IS_CONCERTMASTER_FOR",
"engineer": "IS_ENGINEER_FOR",
# release_group
"tribute": "IS_TRIBUTE_TO",
"dedicated to": "IS_DEDICATED_TO",
"creative direction": "",
"artists and repertoire": ""
}
artist_artist_rel_map = {
"teacher": "TEACHER_OF",
"composer-in-residence": "HAS_COMPOSER-IN-RESIDENCE_STATUS_IN",
"member of band": "IS_MEMBER_OF",
"voice actor": "IS_VOICE_ACTOR_OF",
"tribute": "IS_TRIBUTE_TO",
"supporting musician": "IS_SUPPORTING_MUSICIAN_OF",
"instrumental supporting musician": "IS_INSTRUMENTAL_SUPPORTING_MUSICIAN_OF",
"personal relationship": "HAS_PERSONAL_RELATIONSHIP_WITH",
"musical relationships": "HAS_MUSICAL_RELATIONSHIP_WITH",
"collaboration": "HAS_COLLABORATED_WITH",
"married": "IS_MARRIED_WITH",
"sibling": "IS_SIBLING_OF",
"parent": "IS_PARENT_OF",
"is person": "IS",
"conductor position": "IS_CONDUCTOR_OF",
"vocal supporting musician": "DOES_VOCAL_SUPPORT_FOR",
"artistic director": "IS_ARTIST_DIRECTOR_OF",
"subgroup": "IS_SUBGROUP_OF",
"founder": "IS_FOUNDER_OF",
"involved with": "IS_INVOLVED_WITH",
"named after": "IS_NAMED_AFTER",
}
label_label_rel_map = {
"label rename": "WAS_RENAMED_TO",
"imprint": "DOES_IMPRINT_FOR",
"label distribution": "DOES_DISTRIBUTION_FOR",
"business association": "HAS_BUSINESS_ASSOCIATION_TO",
"label ownership": "OWNS",
"label reissue": "DOES_REISSUING_FOR"
}
if not os.path.exists("repo"):
os.mkdir("repo")
else:
os.system("rm repo/*")
if not os.path.exists("tmp"):
os.mkdir("tmp")
else:
os.system("rm tmp/*")
with open("in/link", "r") as f:
for line in f:
cols = line.split("\t")
links[cols[0]] = cols
with open("in/release_status", "r") as f:
for line in f:
cols = line.split("\t")
release_statuses[cols[0]] = cols
with open("in/link_type", "r") as f:
for line in f:
cols = line.split("\t")
link_types[cols[0]] = cols
with open("in/area", "r") as f:
for line in f:
cols = line.split("\t")
areas[cols[0]] = cols
with open("in/label_type") as f:
for line in f:
cols = line.split("\t")
label_types[cols[0]] = ";" + cols[1].replace(" ", "")
if cols[3] != "\\N" and cols[2] in label_types:
label_types[cols[0]] += label_types[cols[2]].replace(" ", "")
with open("in/artist") as f:
for line in f:
cols = line.split("\t")
artists[cols[0]] = cols
with open("repo/area_area.csv", "w") as out:
out.write(":START_ID(Area),:END_ID(Area)\n")
with open("in/l_area_area", "r") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((areas[cols[3]][1],
areas[cols[2]][1]
)) + "\n")
with open("repo/area.csv", "w") as out:
out.write("id:ID(Area),name\n")
for k, area in areas.items():
out.write(",".join((area[1],
'"' + area[2] + '"'
)) + "\n")
# ------
out_artist = open("repo/artist.csv", "w")
out_artist_area = open("repo/artist_area.csv", "w")
out_artist.write("id:ID(Artist),name,year:int,:LABEL\n")
out_artist_area.write(":START_ID(Artist),:END_ID(Area)\n")
for _, artist in artists.items():
out_artist.write(",".join((
artist[1],
'"' + artist[2].replace("\"", "\"\"") + '"',
artist[4] if artist[4] != "\\N" else "0",
"Artist" + (";Group\n" if artist[10] == "2" else "\n")
)))
if artist[11] != "\\N":
out_artist_area.write(artist[1] + "," + areas[artist[11]][1] + "\n")
out_artist.close()
out_artist_area.close()
with open("repo/artist_artist.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Artist),:TYPE\n")
with open("in/l_artist_artist", "r") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
artists[cols[2]][1],
artists[cols[3]][1],
artist_artist_rel_map[link_types[links[cols[1]][1]][6]] + "\n"
)))
# --------
with open("in/release_group_primary_type") as f:
for line in f:
cols = line.split("\t")
release_types[cols[0]] = ";" + cols[1]
release_group_year = dict()
with open("in/release_group_meta") as f:
for line in f:
cols = line.split("\t")
release_group_year[cols[0]] = cols[2] if cols[2] != "\\N" else "0"
with open("repo/release.csv", "w") as out:
out.write("id:ID(Release),name,year:int,:LABEL\n")
with open("in/release_group") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
cols[1],
'"' + cols[2].replace("\"", "\"\"") + '"',
release_group_year[cols[0]],
"Release" + release_types[cols[4]],
)) + "\n")
release_groups[cols[0]] = cols
with open("in/release") as f:
for line in f:
cols = line.split("\t")
if cols[5] != '\\N' and release_statuses[cols[5]][1] == "Official":
release_to_release_group_map[cols[0]] = cols[4]
credit_names = defaultdict(list)
with open("in/artist_credit_name") as f:
for line in f:
cols = line.split("\t")
credit_names[cols[0]].append(artists[cols[2]][1])
with open("tmp/tmp_artist_release.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Release),:TYPE\n")
# Is this part really necessary?
with open("in/l_artist_release") as f:
for line in f:
cols = line.split("\t")
if cols[3] in release_to_release_group_map:
out.write(",".join((
artists[cols[2]][1],
release_groups[release_to_release_group_map[cols[3]]][1],
artist_release_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# Artist credits
with open("in/release") as f:
for line in f:
cols = line.split("\t")
if cols[0] in release_to_release_group_map:
for credit in credit_names[cols[3]]:
out.write(",".join((
credit,
release_groups[release_to_release_group_map[cols[0]]][1],
"CREDITED_FOR"
)) + "\n")
# Remove dupes
os.system("(head -n 1 tmp/tmp_artist_release.csv && tail -n +2 tmp/tmp_artist_release.csv"
" | sort) | uniq > repo/artist_release.csv && rm tmp/tmp_artist_release.csv")
with open("repo/release_release.csv", "w") as out:
out.write(":START_ID(Release),:END_ID(Release),:TYPE\n")
with open("in/l_release_group_release_group") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
release_groups[cols[2]][1],
release_groups[cols[3]][1],
release_release_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# ---
with open("in/tag") as f:
with open("repo/tag.csv", "w") as out:
out.write("id:ID(Tag),name\n")
for line in f:
cols = line.split("\t")
tags[cols[0]] = cols
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
with open("repo/release_tag.csv", "w") as out:
out.write(":START_ID(Release),:END_ID(Tag),weight:int\n")
with open("in/release_group_tag") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
release_groups[cols[0]][1],
cols[1],
cols[2],
)) + "\n")
with open("repo/artist_tag.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n")
with open("in/artist_tag") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
artists[cols[0]][1],
cols[1],
cols[2],
)) + "\n")
with open("repo/tag_tag.csv", "w") as out:
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
with open("in/tag_relation") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
cols[0],
cols[1],
cols[2],
)) + "\n")
# -----
with open("repo/labels.csv", "w") as out:
out.write("id:ID(Label),name,code,:LABEL\n")
with open("in/label") as f:
for line in f:
cols = line.split("\t")
labels[cols[0]] = cols
out.write(",".join((
cols[1],
"\"" + cols[2].replace("\"", "\"\"") + "\"",
cols[9] if cols[9] != "\\N" else "",
"Label" + label_types[cols[10]]
)) + "\n")
with open("repo/label_label.csv", "w") as out:
out.write(":START_ID(Label),:END_ID(Label),:TYPE\n")
with open("in/l_label_label") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
labels[cols[2]][1],
labels[cols[3]][1],
label_label_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# ---

View File

@ -1 +0,0 @@
requests

View File

@ -1,2 +0,0 @@
CREATE INDEX ON :Artist(id);
CREATE INDEX ON :Release(id);

View File

@ -1,5 +0,0 @@
#!/usr/bin/env bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell

@ -1 +0,0 @@
Subproject commit e025596cf2ccfbe803f05cb848927ae36fe270a3