diff --git a/.gitignore b/.gitignore index 719b951..4492074 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ __pycache__/ # Distribution / packaging .Python -build/ develop-eggs/ dist/ downloads/ diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 29218bc..0000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "task_tracker_drone"] - path = task_tracker_drone - url = https://github.com/simon987/task_tracker_drone/ -[submodule "last.fm"] - path = last.fm - url = https://git.simon987.net/drone/last.fm diff --git a/README.md b/README.md index 22c6eaf..a749d86 100644 --- a/README.md +++ b/README.md @@ -6,43 +6,6 @@ wip -### Data import from MusicBrainz & Last.fm - -```bash -# Download latest database dump -./get_musicbrainz_dump.sh - -# Convert to .csv -python convert_mb.py - -# Generate scraping tasks for task_tracker_drone (See notes) -python generate_scrape_tasks.py - -# Apply last.fm data to artist.csv -python patch_artists_with_lastfm.py "/path/to/lasfm_data.db" - -# Expose generated .csv data to the network -cd repo/ -python -m http.server 9999 - -# On the machine where neo4j is installed: -./import.sh -``` - -### task_tracker setup: - -Last.fm api calls are queued to [task_tracker](https://github.com/simon987/task_tracker/), - and results are gathered by a [task_tracker_drone](https://github.com/simon987/task_tracker_drone/) - ([script](https://git.simon987.net/drone/last.fm/src/master/run)). - - -Project secret: -```json -{ - "apikey": "", - "user": "" -} -``` ### Api setup diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/caa/run b/caa/run deleted file mode 100755 index a91da8c..0000000 --- a/caa/run +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python - -import json -import os -import random -import sqlite3 -import sys -import traceback -from io import BytesIO -from pathlib import Path - -import PIL -import requests -from PIL import Image - -PIL.Image.MAX_IMAGE_PIXELS = 933120000 -current_mbid = "" - - -def should_download(image: dict): - return image["front"] is True - - -def thumb(cover_blob): - with Image.open(BytesIO(cover_blob)) as image: - - # https://stackoverflow.com/questions/43978819 - if image.mode == "I;16": - image.mode = "I" - image.point(lambda i: i * (1. / 256)).convert('L') - - image.thumbnail((256, 256), Image.BICUBIC) - canvas = Image.new("RGB", image.size, 0x000000) - - if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info): - try: - canvas.paste(image, mask=image.split()[-1]) - except ValueError: - canvas.paste(image) - else: - canvas.paste(image) - - blob = BytesIO() - canvas.save(blob, "JPEG", quality=85, optimize=True) - canvas.close() - - return blob.getvalue() - - -def download(mbid): - global current_mbid - current_mbid = mbid - r = requests.get("https://archive.org/metadata/mbid-" + mbid) - meta = r.json() - - if "files" not in meta or "workable_servers" not in meta or not meta["workable_servers"]: - return - - directory = "https://" + random.choice(meta["workable_servers"]) + meta["dir"] - index = directory + "/index.json" - - r = requests.get(index) - if r.status_code == 404: - mb_meta = meta - urls = [ - directory + "/" + f["name"] - for f in meta["files"] if "thumb" not in f["name"] and - not f["name"].endswith((".xml", ".txt", ".json", ".torrent")) - ] - else: - mb_meta = r.json() - - urls = [ - directory + "/mbid-" + mbid + "-" + image["image"][image["image"].rfind("/") + 1:] - for image in mb_meta["images"] if should_download(image) - ] - - if not urls: - return - - cover = requests.get(urls[0]).content - - if cover: - dbfile = "/mnt/Data7/caa_p2.db" - if not os.path.exists(dbfile): - with sqlite3.connect(dbfile, timeout=30000) as conn: - c = conn.cursor() - c.execute( - "CREATE TABLE covers(id TEXT, cover BLOB, tn BLOB, meta TEXT, ts TEXT default CURRENT_TIMESTAMP)") - - try: - tn = thumb(cover) - except: - tn = None - - with sqlite3.connect(dbfile, timeout=30000) as conn: - c = conn.cursor() - c.execute("INSERT INTO covers (id, cover, tn, meta) VALUES (?,?,?,?)", - (mbid, cover, tn, json.dumps(mb_meta),)) - - -try: - task_str = sys.argv[1] - task = json.loads(task_str) - mbids = json.loads(task["recipe"]) - for mbid in mbids: - download(mbid) - -except Exception as e: - print(json.dumps({ - "result": 1, - "logs": [ - {"message": str(e) + "$$" + current_mbid + "$$" + traceback.format_exc(), "level": 3} - ] - })) - quit(2) - -print(json.dumps({ - "result": 0, -})) diff --git a/download_mb_dump.sh b/download_mb_dump.sh deleted file mode 100755 index ff3bf47..0000000 --- a/download_mb_dump.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -latest=$(curl http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/LATEST) - -mkdir in 2> /dev/null -cd in - -wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump.tar.bz2" -wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump-derived.tar.bz2" - -tar -xjvf mbdump.tar.bz2 mbdump/area mbdump/artist mbdump/l_area_area mbdump/l_artist_artist \ -mbdump/l_artist_release mbdump/l_artist_release_group mbdump/l_label_label mbdump/l_release_group_release_group \ -mbdump/label mbdump/label_type mbdump/link mbdump/link_type mbdump/release mbdump/release_group \ -mbdump/release_group_primary_type mbdump/artist_credit_name mbdump/release_status -tar -xjvf mbdump-derived.tar.bz2 mbdump/artist_tag mbdump/release_group_tag mbdump/tag mbdump/tag_relation \ -mbdump/release_group_meta - -mv mbdump/* . -rm -r mbdump -cd .. \ No newline at end of file diff --git a/extract_covers.py b/extract_covers.py deleted file mode 100644 index e50ea6c..0000000 --- a/extract_covers.py +++ /dev/null @@ -1,27 +0,0 @@ -import sqlite3 - -import sys - -with sqlite3.connect(sys.argv[1]) as conn: - - cursor = conn.cursor() - cursor.execute("SELECT id from covers") - - cursor = conn.cursor() - cursor.execute("SELECT id from covers") - - def rows(): - buf = list() - for row in cursor.fetchall(): - buf.append(row[0]) - if len(buf) > 30: - yield buf - buf.clear() - - for batch in rows(): - cursor.execute("SELECT cover from covers where id in (%s)" % (",".join(("'" + b + "'") for b in batch))) - covers = cursor.fetchall() - for i, cover in enumerate(covers): - with open("./tmpcovers/" + batch[i] + ".jpg", "wb") as out: - out.write(cover[0]) - print(batch[i]) diff --git a/generate_caa_tasks.py b/generate_caa_tasks.py deleted file mode 100644 index cb20260..0000000 --- a/generate_caa_tasks.py +++ /dev/null @@ -1,56 +0,0 @@ -import json -from multiprocessing.pool import ThreadPool - -from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker - -TT_API_URL = "https://tt.simon987.net/api" -TT_PROJECT = 5 - - -done = set() -# with sqlite3.connect(sys.argv[1]) as conn: -# cur = conn.cursor() -# cur.execute("SELECT id FROM covers") -# for mbid in cur.fetchall(): -# done.add(mbid[0]) - -api = TaskTrackerApi(TT_API_URL) - -worker = Worker.from_file(api) -if not worker: - worker = api.make_worker("caa scraper") - worker.dump_to_file() -worker.request_access(TT_PROJECT, True, True) -input("Give permission to " + worker.alias) - - -def mktask(mbids): - res = worker.submit_task( - project=TT_PROJECT, - recipe=json.dumps(mbids), - hash64=hash(mbids[0]), - max_assign_time=60 * 30, - priority=1, - unique_str=None, - verification_count=None, - max_retries=5, - ) - print(res.text) - - -def lines(): - with open("in/release") as f: - buf = list() - - for line in f: - cols = line.split("\t") - - buf.append(cols[1]) - if len(buf) == 75: - a = list(buf) - buf.clear() - yield a - - -pool = ThreadPool(processes=20) -pool.map(func=mktask, iterable=lines()) diff --git a/generate_lastfm_tasks.py b/generate_lastfm_tasks.py deleted file mode 100644 index 1d8051c..0000000 --- a/generate_lastfm_tasks.py +++ /dev/null @@ -1,48 +0,0 @@ -import csv -import json -from multiprocessing.pool import ThreadPool - -from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker - -TT_API_URL = "https://tt.simon987.net/api" -TT_PROJECT = 1 - -api = TaskTrackerApi(TT_API_URL) - -worker = Worker.from_file(api) -if not worker: - worker = api.make_worker("last.fm scraper") - worker.dump_to_file() -worker.request_access(TT_PROJECT, True, True) -input("Give permission to " + worker.alias) - -with open("repo/artist.csv") as f: - reader = csv.reader(f) - - def mktask(lines): - res = worker.submit_task( - project=TT_PROJECT, - recipe=json.dumps( - [{"mbid": line[0], "name": line[1]} for line in lines] - ), - unique_str=lines[0][0], - max_assign_time=60 * 5, - ) - print(res.text) - - def lines(): - line_batch = list() - - for line in reader: - if "Group" in line[3]: - line_batch.append(line) - if len(line_batch) >= 30: - res = list(line_batch) - line_batch.clear() - yield res - - tasks = list(lines()) - - pool = ThreadPool(processes=25) - pool.map(func=mktask, iterable=tasks) - diff --git a/jenkins/Jenkinsfile b/jenkins/Jenkinsfile index e69de29..f2b2cfd 100644 --- a/jenkins/Jenkinsfile +++ b/jenkins/Jenkinsfile @@ -0,0 +1,41 @@ +def remote = [:] +remote.name = 'remote' +remote.host = env.DEPLOY_HOST +remote.user = env.DEPLOY_USER +remote.identityFile = '/var/lib/jenkins/.ssh/id_rsa' +remote.knownHosts = '/var/lib/jenkins/.ssh/known_hosts' +remote.allowAnyHosts = true +remote.retryCount = 3 +remote.retryWaitSec = 3 +logLevel = 'FINER' + +pipeline { + stages { + stage('Build') { + agent { + docker { + image 'node:10-alpine' + args '--network "host"' + } + } + steps { + sh 'cd music_graph/ && npm install && npm audit fix && npm run build' + sh 'mv music_graph/dist webroot' + stash includes: 'webroot/', name: 'webdist' + } + } + stage('Deploy') { + agent none + steps { + node('master') { + unstash 'webdist' + sshCommand remote: remote, command: "cd simon987 && rm -rf webroot/* deploy.sh" + sshPut remote: remote, from: 'webroot/', into: 'music-graph' + sshPut remote: remote, from: 'jenkins/deploy.sh', into: 'music-graph/' + sshCommand remote: remote, command: 'chmod +x music-graph/deploy.sh && ./music-graph/deploy.sh' + } + } + } + } +} + diff --git a/jenkins/build.sh b/jenkins/build.sh index 59e324b..f1f641a 100755 --- a/jenkins/build.sh +++ b/jenkins/build.sh @@ -1,4 +1 @@ #!/usr/bin/env bash - -git submodule init -git submodule update --remote \ No newline at end of file diff --git a/jenkins/deploy.sh b/jenkins/deploy.sh index f1f641a..8b56473 100755 --- a/jenkins/deploy.sh +++ b/jenkins/deploy.sh @@ -1 +1,5 @@ #!/usr/bin/env bash + +export MGROOT="music-graph" + +chmod 755 -R "${MGROOT}/webroot" diff --git a/last.fm b/last.fm deleted file mode 160000 index 855df64..0000000 --- a/last.fm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 855df64c316930062ff4f7740492d0f039788498 diff --git a/make_neoj4_db.sh b/make_neoj4_db.sh deleted file mode 100755 index 85930e7..0000000 --- a/make_neoj4_db.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3" -export REPOSITORY="http://localhost:9999" -export DATABASE="graph.db" - -rm -rf "${NEO4J_HOME}/data/databases/${DATABASE}" - -cp ${NEO4J_HOME}/conf/neo4j.conf ${NEO4J_HOME}/conf/neo4j.conf.bak -echo "dbms.security.auth_enabled=false" >> ${NEO4J_HOME}/conf/neo4j.conf - -mkdir workspace 2> /dev/null -cd workspace -rm *.csv - -wget ${REPOSITORY}/area.csv -wget ${REPOSITORY}/area_area.csv -wget ${REPOSITORY}/lastfm_artist.csv -wget ${REPOSITORY}/artist_area.csv -wget ${REPOSITORY}/artist_artist.csv -wget ${REPOSITORY}/artist_release.csv -wget ${REPOSITORY}/release.csv -wget ${REPOSITORY}/tag.csv -wget ${REPOSITORY}/tag_tag.csv -wget ${REPOSITORY}/release_tag.csv -wget ${REPOSITORY}/release_release.csv -wget ${REPOSITORY}/artist_tag.csv -wget ${REPOSITORY}/labels.csv -wget ${REPOSITORY}/label_label.csv -wget ${REPOSITORY}/lastfm_artist_artist.csv - -. ${NEO4J_HOME}/bin/neo4j-admin import \ - --database ${DATABASE}\ - --high-io=true\ - --nodes:Area:MusicBrainzEntity "area.csv"\ - --nodes:MusicBrainzEntity "release.csv"\ - --nodes:MusicBrainzEntity "lastfm_artist.csv"\ - --nodes:Tag "tag.csv"\ - --nodes:MusicBrainzEntity "labels.csv"\ - --relationships:IS_PART_OF "area_area.csv"\ - --relationships:IS_BASED_IN "artist_area.csv"\ - --relationships "artist_artist.csv"\ - --relationships "artist_release.csv"\ - --relationships:IS_TAGGED "release_tag.csv"\ - --relationships:IS_TAGGED "artist_tag.csv"\ - --relationships:IS_RELATED_TO "tag_tag.csv"\ - --relationships "label_label.csv"\ - --relationships "release_release.csv"\ - --relationships:IS_RELATED_TO "lastfm_artist_artist.csv" - -rm *.csv -cd .. - diff --git a/make_release_to_rg_map.py b/make_release_to_rg_map.py deleted file mode 100644 index ba21978..0000000 --- a/make_release_to_rg_map.py +++ /dev/null @@ -1,31 +0,0 @@ -import sqlite3 - -release_to_release_group_map = dict() -release_groups = dict() - -with open("in/release_group") as f: - for line in f: - cols = line.split("\t") - release_groups[cols[0]] = cols[1] - -with open("in/release") as f: - for line in f: - cols = line.split("\t") - release_to_release_group_map[cols[1]] = release_groups[cols[4]] - -with sqlite3.connect("mapdb.db") as conn: - - cursor = conn.cursor() - cursor.execute("CREATE TABLE map (release TEXT PRIMARY KEY , release_group TEXT)") - - for k, v in release_to_release_group_map.items(): - cursor.execute("INSERT INTO map (release, release_group) VALUES (?,?)", (k, v)) - conn.commit() - -""" -CREATE TABLE covers (id TEXT primary key, cover BLOB); -ATTACH 'mapdb.db' AS map; -ATTACH '/mnt/Data8/caa_tn_only.db' AS source; -INSERT OR IGNORE INTO covers SELECT release_group, cover FROM source.covers INNER JOIN map.map ON id = map.release; -""" - diff --git a/ui/.babelrc b/music_graph/.babelrc similarity index 100% rename from ui/.babelrc rename to music_graph/.babelrc diff --git a/ui/.editorconfig b/music_graph/.editorconfig similarity index 100% rename from ui/.editorconfig rename to music_graph/.editorconfig diff --git a/ui/.eslintignore b/music_graph/.eslintignore similarity index 100% rename from ui/.eslintignore rename to music_graph/.eslintignore diff --git a/ui/.eslintrc.js b/music_graph/.eslintrc.js similarity index 100% rename from ui/.eslintrc.js rename to music_graph/.eslintrc.js diff --git a/ui/.postcssrc.js b/music_graph/.postcssrc.js similarity index 100% rename from ui/.postcssrc.js rename to music_graph/.postcssrc.js diff --git a/music_graph/build/build.js b/music_graph/build/build.js new file mode 100644 index 0000000..8f2ad8a --- /dev/null +++ b/music_graph/build/build.js @@ -0,0 +1,41 @@ +'use strict' +require('./check-versions')() + +process.env.NODE_ENV = 'production' + +const ora = require('ora') +const rm = require('rimraf') +const path = require('path') +const chalk = require('chalk') +const webpack = require('webpack') +const config = require('../config') +const webpackConfig = require('./webpack.prod.conf') + +const spinner = ora('building for production...') +spinner.start() + +rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), err => { + if (err) throw err + webpack(webpackConfig, (err, stats) => { + spinner.stop() + if (err) throw err + process.stdout.write(stats.toString({ + colors: true, + modules: false, + children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build. + chunks: false, + chunkModules: false + }) + '\n\n') + + if (stats.hasErrors()) { + console.log(chalk.red(' Build failed with errors.\n')) + process.exit(1) + } + + console.log(chalk.cyan(' Build complete.\n')) + console.log(chalk.yellow( + ' Tip: built files are meant to be served over an HTTP server.\n' + + ' Opening index.html over file:// won\'t work.\n' + )) + }) +}) diff --git a/music_graph/build/check-versions.js b/music_graph/build/check-versions.js new file mode 100644 index 0000000..3ef972a --- /dev/null +++ b/music_graph/build/check-versions.js @@ -0,0 +1,54 @@ +'use strict' +const chalk = require('chalk') +const semver = require('semver') +const packageConfig = require('../package.json') +const shell = require('shelljs') + +function exec (cmd) { + return require('child_process').execSync(cmd).toString().trim() +} + +const versionRequirements = [ + { + name: 'node', + currentVersion: semver.clean(process.version), + versionRequirement: packageConfig.engines.node + } +] + +if (shell.which('npm')) { + versionRequirements.push({ + name: 'npm', + currentVersion: exec('npm --version'), + versionRequirement: packageConfig.engines.npm + }) +} + +module.exports = function () { + const warnings = [] + + for (let i = 0; i < versionRequirements.length; i++) { + const mod = versionRequirements[i] + + if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) { + warnings.push(mod.name + ': ' + + chalk.red(mod.currentVersion) + ' should be ' + + chalk.green(mod.versionRequirement) + ) + } + } + + if (warnings.length) { + console.log('') + console.log(chalk.yellow('To use this template, you must update following to modules:')) + console.log() + + for (let i = 0; i < warnings.length; i++) { + const warning = warnings[i] + console.log(' ' + warning) + } + + console.log() + process.exit(1) + } +} diff --git a/music_graph/build/utils.js b/music_graph/build/utils.js new file mode 100644 index 0000000..e534fb0 --- /dev/null +++ b/music_graph/build/utils.js @@ -0,0 +1,101 @@ +'use strict' +const path = require('path') +const config = require('../config') +const ExtractTextPlugin = require('extract-text-webpack-plugin') +const packageConfig = require('../package.json') + +exports.assetsPath = function (_path) { + const assetsSubDirectory = process.env.NODE_ENV === 'production' + ? config.build.assetsSubDirectory + : config.dev.assetsSubDirectory + + return path.posix.join(assetsSubDirectory, _path) +} + +exports.cssLoaders = function (options) { + options = options || {} + + const cssLoader = { + loader: 'css-loader', + options: { + sourceMap: options.sourceMap + } + } + + const postcssLoader = { + loader: 'postcss-loader', + options: { + sourceMap: options.sourceMap + } + } + + // generate loader string to be used with extract text plugin + function generateLoaders (loader, loaderOptions) { + const loaders = options.usePostCSS ? [cssLoader, postcssLoader] : [cssLoader] + + if (loader) { + loaders.push({ + loader: loader + '-loader', + options: Object.assign({}, loaderOptions, { + sourceMap: options.sourceMap + }) + }) + } + + // Extract CSS when that option is specified + // (which is the case during production build) + if (options.extract) { + return ExtractTextPlugin.extract({ + use: loaders, + fallback: 'vue-style-loader' + }) + } else { + return ['vue-style-loader'].concat(loaders) + } + } + + // https://vue-loader.vuejs.org/en/configurations/extract-css.html + return { + css: generateLoaders(), + postcss: generateLoaders(), + less: generateLoaders('less'), + sass: generateLoaders('sass', { indentedSyntax: true }), + scss: generateLoaders('sass'), + stylus: generateLoaders('stylus'), + styl: generateLoaders('stylus') + } +} + +// Generate loaders for standalone style files (outside of .vue) +exports.styleLoaders = function (options) { + const output = [] + const loaders = exports.cssLoaders(options) + + for (const extension in loaders) { + const loader = loaders[extension] + output.push({ + test: new RegExp('\\.' + extension + '$'), + use: loader + }) + } + + return output +} + +exports.createNotifierCallback = () => { + const notifier = require('node-notifier') + + return (severity, errors) => { + if (severity !== 'error') return + + const error = errors[0] + const filename = error.file && error.file.split('!').pop() + + notifier.notify({ + title: packageConfig.name, + message: severity + ': ' + error.name, + subtitle: filename || '', + icon: path.join(__dirname, 'logo.png') + }) + } +} diff --git a/music_graph/build/vue-loader.conf.js b/music_graph/build/vue-loader.conf.js new file mode 100644 index 0000000..33ed58b --- /dev/null +++ b/music_graph/build/vue-loader.conf.js @@ -0,0 +1,22 @@ +'use strict' +const utils = require('./utils') +const config = require('../config') +const isProduction = process.env.NODE_ENV === 'production' +const sourceMapEnabled = isProduction + ? config.build.productionSourceMap + : config.dev.cssSourceMap + +module.exports = { + loaders: utils.cssLoaders({ + sourceMap: sourceMapEnabled, + extract: isProduction + }), + cssSourceMap: sourceMapEnabled, + cacheBusting: config.dev.cacheBusting, + transformToRequire: { + video: ['src', 'poster'], + source: 'src', + img: 'src', + image: 'xlink:href' + } +} diff --git a/music_graph/build/webpack.base.conf.js b/music_graph/build/webpack.base.conf.js new file mode 100644 index 0000000..aa4fcf0 --- /dev/null +++ b/music_graph/build/webpack.base.conf.js @@ -0,0 +1,91 @@ +'use strict' +const path = require('path') +const utils = require('./utils') +const config = require('../config') +const vueLoaderConfig = require('./vue-loader.conf') + +function resolve (dir) { + return path.join(__dirname, '..', dir) +} + +const createLintingRule = () => ({ + test: /\.(js|vue)$/, + loader: 'eslint-loader', + enforce: 'pre', + include: [resolve('src'), resolve('test')], + options: { + formatter: require('eslint-friendly-formatter'), + emitWarning: !config.dev.showEslintErrorsInOverlay + } +}) + +module.exports = { + context: path.resolve(__dirname, '../'), + entry: { + app: './src/main.js' + }, + output: { + path: config.build.assetsRoot, + filename: '[name].js', + publicPath: process.env.NODE_ENV === 'production' + ? config.build.assetsPublicPath + : config.dev.assetsPublicPath + }, + resolve: { + extensions: ['.js', '.vue', '.json'], + alias: { + '@': resolve('src'), + } + }, + module: { + rules: [ + ...(config.dev.useEslint ? [createLintingRule()] : []), + { + test: /\.vue$/, + loader: 'vue-loader', + options: vueLoaderConfig + }, + { + test: /\.js$/, + loader: 'babel-loader', + include: [resolve('src'), resolve('test'), resolve('node_modules/webpack-dev-server/client')] + }, + { + test: /\.(png|jpe?g|gif|svg)(\?.*)?$/, + loader: 'url-loader', + options: { + limit: 10000, + name: utils.assetsPath('img/[name].[hash:7].[ext]') + } + }, + { + test: /\.(mp4|webm|ogg|mp3|wav|flac|aac)(\?.*)?$/, + loader: 'url-loader', + options: { + limit: 10000, + name: utils.assetsPath('media/[name].[hash:7].[ext]') + } + }, + { + test: /\.(woff2?|eot|ttf|otf)(\?.*)?$/, + loader: 'url-loader', + options: { + limit: 10000, + name: utils.assetsPath('fonts/[name].[hash:7].[ext]') + } + } + ] + }, + node: { + // prevent webpack from injecting useless setImmediate polyfill because Vue + // source contains it (although only uses it if it's native). + setImmediate: false, + // prevent webpack from injecting mocks to Node native modules + // that does not make sense for the client + dgram: 'empty', + fs: 'empty', + net: 'empty', + tls: 'empty', + child_process: 'empty' + } +} diff --git a/music_graph/build/webpack.dev.conf.js b/music_graph/build/webpack.dev.conf.js new file mode 100755 index 0000000..070ae22 --- /dev/null +++ b/music_graph/build/webpack.dev.conf.js @@ -0,0 +1,95 @@ +'use strict' +const utils = require('./utils') +const webpack = require('webpack') +const config = require('../config') +const merge = require('webpack-merge') +const path = require('path') +const baseWebpackConfig = require('./webpack.base.conf') +const CopyWebpackPlugin = require('copy-webpack-plugin') +const HtmlWebpackPlugin = require('html-webpack-plugin') +const FriendlyErrorsPlugin = require('friendly-errors-webpack-plugin') +const portfinder = require('portfinder') + +const HOST = process.env.HOST +const PORT = process.env.PORT && Number(process.env.PORT) + +const devWebpackConfig = merge(baseWebpackConfig, { + module: { + rules: utils.styleLoaders({ sourceMap: config.dev.cssSourceMap, usePostCSS: true }) + }, + // cheap-module-eval-source-map is faster for development + devtool: config.dev.devtool, + + // these devServer options should be customized in /config/index.js + devServer: { + clientLogLevel: 'warning', + historyApiFallback: { + rewrites: [ + { from: /.*/, to: path.posix.join(config.dev.assetsPublicPath, 'index.html') }, + ], + }, + hot: true, + contentBase: false, // since we use CopyWebpackPlugin. + compress: true, + host: HOST || config.dev.host, + port: PORT || config.dev.port, + open: config.dev.autoOpenBrowser, + overlay: config.dev.errorOverlay + ? { warnings: false, errors: true } + : false, + publicPath: config.dev.assetsPublicPath, + proxy: config.dev.proxyTable, + quiet: true, // necessary for FriendlyErrorsPlugin + watchOptions: { + poll: config.dev.poll, + } + }, + plugins: [ + new webpack.DefinePlugin({ + 'process.env': require('../config/dev.env') + }), + new webpack.HotModuleReplacementPlugin(), + new webpack.NamedModulesPlugin(), // HMR shows correct file names in console on update. + new webpack.NoEmitOnErrorsPlugin(), + // https://github.com/ampedandwired/html-webpack-plugin + new HtmlWebpackPlugin({ + filename: 'index.html', + template: 'index.html', + inject: true + }), + // copy custom static assets + new CopyWebpackPlugin([ + { + from: path.resolve(__dirname, '../static'), + to: config.dev.assetsSubDirectory, + ignore: ['.*'] + } + ]) + ] +}) + +module.exports = new Promise((resolve, reject) => { + portfinder.basePort = process.env.PORT || config.dev.port + portfinder.getPort((err, port) => { + if (err) { + reject(err) + } else { + // publish the new Port, necessary for e2e tests + process.env.PORT = port + // add port to devServer config + devWebpackConfig.devServer.port = port + + // Add FriendlyErrorsPlugin + devWebpackConfig.plugins.push(new FriendlyErrorsPlugin({ + compilationSuccessInfo: { + messages: [`Your application is running here: http://${devWebpackConfig.devServer.host}:${port}`], + }, + onErrors: config.dev.notifyOnErrors + ? utils.createNotifierCallback() + : undefined + })) + + resolve(devWebpackConfig) + } + }) +}) diff --git a/music_graph/build/webpack.prod.conf.js b/music_graph/build/webpack.prod.conf.js new file mode 100644 index 0000000..d9f99f6 --- /dev/null +++ b/music_graph/build/webpack.prod.conf.js @@ -0,0 +1,145 @@ +'use strict' +const path = require('path') +const utils = require('./utils') +const webpack = require('webpack') +const config = require('../config') +const merge = require('webpack-merge') +const baseWebpackConfig = require('./webpack.base.conf') +const CopyWebpackPlugin = require('copy-webpack-plugin') +const HtmlWebpackPlugin = require('html-webpack-plugin') +const ExtractTextPlugin = require('extract-text-webpack-plugin') +const OptimizeCSSPlugin = require('optimize-css-assets-webpack-plugin') +const UglifyJsPlugin = require('uglifyjs-webpack-plugin') + +const env = require('../config/prod.env') + +const webpackConfig = merge(baseWebpackConfig, { + module: { + rules: utils.styleLoaders({ + sourceMap: config.build.productionSourceMap, + extract: true, + usePostCSS: true + }) + }, + devtool: config.build.productionSourceMap ? config.build.devtool : false, + output: { + path: config.build.assetsRoot, + filename: utils.assetsPath('js/[name].[chunkhash].js'), + chunkFilename: utils.assetsPath('js/[id].[chunkhash].js') + }, + plugins: [ + // http://vuejs.github.io/vue-loader/en/workflow/production.html + new webpack.DefinePlugin({ + 'process.env': env + }), + new UglifyJsPlugin({ + uglifyOptions: { + compress: { + warnings: false + } + }, + sourceMap: config.build.productionSourceMap, + parallel: true + }), + // extract css into its own file + new ExtractTextPlugin({ + filename: utils.assetsPath('css/[name].[contenthash].css'), + // Setting the following option to `false` will not extract CSS from codesplit chunks. + // Their CSS will instead be inserted dynamically with style-loader when the codesplit chunk has been loaded by webpack. + // It's currently set to `true` because we are seeing that sourcemaps are included in the codesplit bundle as well when it's `false`, + // increasing file size: https://github.com/vuejs-templates/webpack/issues/1110 + allChunks: true, + }), + // Compress extracted CSS. We are using this plugin so that possible + // duplicated CSS from different components can be deduped. + new OptimizeCSSPlugin({ + cssProcessorOptions: config.build.productionSourceMap + ? { safe: true, map: { inline: false } } + : { safe: true } + }), + // generate dist index.html with correct asset hash for caching. + // you can customize output by editing /index.html + // see https://github.com/ampedandwired/html-webpack-plugin + new HtmlWebpackPlugin({ + filename: config.build.index, + template: 'index.html', + inject: true, + minify: { + removeComments: true, + collapseWhitespace: true, + removeAttributeQuotes: true + // more options: + // https://github.com/kangax/html-minifier#options-quick-reference + }, + // necessary to consistently work with multiple chunks via CommonsChunkPlugin + chunksSortMode: 'dependency' + }), + // keep module.id stable when vendor modules does not change + new webpack.HashedModuleIdsPlugin(), + // enable scope hoisting + new webpack.optimize.ModuleConcatenationPlugin(), + // split vendor js into its own file + new webpack.optimize.CommonsChunkPlugin({ + name: 'vendor', + minChunks (module) { + // any required modules inside node_modules are extracted to vendor + return ( + module.resource && + /\.js$/.test(module.resource) && + module.resource.indexOf( + path.join(__dirname, '../node_modules') + ) === 0 + ) + } + }), + // extract webpack runtime and module manifest to its own file in order to + // prevent vendor hash from being updated whenever app bundle is updated + new webpack.optimize.CommonsChunkPlugin({ + name: 'manifest', + minChunks: Infinity + }), + // This instance extracts shared chunks from code splitted chunks and bundles them + // in a separate chunk, similar to the vendor chunk + // see: https://webpack.js.org/plugins/commons-chunk-plugin/#extra-async-commons-chunk + new webpack.optimize.CommonsChunkPlugin({ + name: 'app', + async: 'vendor-async', + children: true, + minChunks: 3 + }), + + // copy custom static assets + new CopyWebpackPlugin([ + { + from: path.resolve(__dirname, '../static'), + to: config.build.assetsSubDirectory, + ignore: ['.*'] + } + ]) + ] +}) + +if (config.build.productionGzip) { + const CompressionWebpackPlugin = require('compression-webpack-plugin') + + webpackConfig.plugins.push( + new CompressionWebpackPlugin({ + asset: '[path].gz[query]', + algorithm: 'gzip', + test: new RegExp( + '\\.(' + + config.build.productionGzipExtensions.join('|') + + ')$' + ), + threshold: 10240, + minRatio: 0.8 + }) + ) +} + +if (config.build.bundleAnalyzerReport) { + const BundleAnalyzerPlugin = require('webpack-bundle-analyzer').BundleAnalyzerPlugin + webpackConfig.plugins.push(new BundleAnalyzerPlugin()) +} + +module.exports = webpackConfig diff --git a/ui/config/dev.env.js b/music_graph/config/dev.env.js similarity index 100% rename from ui/config/dev.env.js rename to music_graph/config/dev.env.js diff --git a/ui/config/index.js b/music_graph/config/index.js similarity index 100% rename from ui/config/index.js rename to music_graph/config/index.js diff --git a/ui/config/prod.env.js b/music_graph/config/prod.env.js similarity index 100% rename from ui/config/prod.env.js rename to music_graph/config/prod.env.js diff --git a/ui/index.html b/music_graph/index.html similarity index 100% rename from ui/index.html rename to music_graph/index.html diff --git a/ui/package-lock.json b/music_graph/package-lock.json similarity index 100% rename from ui/package-lock.json rename to music_graph/package-lock.json diff --git a/ui/package.json b/music_graph/package.json similarity index 100% rename from ui/package.json rename to music_graph/package.json diff --git a/ui/src/App.vue b/music_graph/src/App.vue similarity index 100% rename from ui/src/App.vue rename to music_graph/src/App.vue diff --git a/ui/src/assets/.gitkeep b/music_graph/src/assets/.gitkeep similarity index 100% rename from ui/src/assets/.gitkeep rename to music_graph/src/assets/.gitkeep diff --git a/ui/src/components/ArtistInfo.vue b/music_graph/src/components/ArtistInfo.vue similarity index 100% rename from ui/src/components/ArtistInfo.vue rename to music_graph/src/components/ArtistInfo.vue diff --git a/ui/src/components/HelloWorld.vue b/music_graph/src/components/HelloWorld.vue similarity index 100% rename from ui/src/components/HelloWorld.vue rename to music_graph/src/components/HelloWorld.vue diff --git a/ui/src/components/ImageCarousel.vue b/music_graph/src/components/ImageCarousel.vue similarity index 100% rename from ui/src/components/ImageCarousel.vue rename to music_graph/src/components/ImageCarousel.vue diff --git a/ui/src/main.js b/music_graph/src/main.js similarity index 100% rename from ui/src/main.js rename to music_graph/src/main.js diff --git a/ui/src/router/index.js b/music_graph/src/router/index.js similarity index 100% rename from ui/src/router/index.js rename to music_graph/src/router/index.js diff --git a/ui/static/.gitkeep b/music_graph/static/.gitkeep similarity index 100% rename from ui/static/.gitkeep rename to music_graph/static/.gitkeep diff --git a/ui/static/data.json b/music_graph/static/data.json similarity index 100% rename from ui/static/data.json rename to music_graph/static/data.json diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..48e341a --- /dev/null +++ b/package-lock.json @@ -0,0 +1,3 @@ +{ + "lockfileVersion": 1 +} diff --git a/process_lastfm_data.py b/process_lastfm_data.py deleted file mode 100644 index af6b1c6..0000000 --- a/process_lastfm_data.py +++ /dev/null @@ -1,100 +0,0 @@ -import csv -import json -import sqlite3 -from collections import defaultdict -import sys - -artists = set() - - -def disambiguate(lfm_artist, artist_release_count, name, mbid): - existing_mbid = lfm_artist.get(name, None) - - if existing_mbid and mbid != existing_mbid: - if artist_release_count[existing_mbid] < artist_release_count[mbid]: - - lfm_artist[name] = mbid - - print("Replacing %s (%s) with %s (%d) for %s" % - (existing_mbid, artist_release_count[existing_mbid], - mbid, artist_release_count[mbid], - name)) - else: - lfm_artist[name] = mbid - - -def patch(lastfm_data): - - artist_listeners = dict() - lastfm_artist_to_mbid = dict() - artist_release_count = defaultdict(int) - related = list() - - with open("repo/artist_release.csv") as f: - for line in f: - cols = line.split(',') - artist_release_count[cols[0]] += 1 - - with sqlite3.connect(lastfm_data) as conn: - cur = conn.cursor() - cur.execute("SELECT data FROM lastfmdata", ) - data = list(cur.fetchall()) - - # A lastfm artist name can refer to multiple MBIDs - # For RELATED_TO purposes, we assume that the MBID referring - # to the artist with the most official releases is the one - - for row in data: - meta = json.loads(row[0]) - - disambiguate(lastfm_artist_to_mbid, artist_release_count, meta["name"], meta["artist"]) - - for similar in [s for s in meta["similar"] if s["mbid"] is not None]: - disambiguate(lastfm_artist_to_mbid, artist_release_count, similar["name"], similar["mbid"]) - - # Get related links & listener counts - for row in data: - meta = json.loads(row[0]) - - artist_listeners[lastfm_artist_to_mbid[meta["name"]]] = \ - (meta["listeners"], meta["playcount"]) - - for similar in [s for s in meta["similar"] if s["mbid"] is not None]: - related.append(( - lastfm_artist_to_mbid[similar["name"]], - lastfm_artist_to_mbid[meta["name"]], - similar["match"] - )) - - with open("repo/lastfm_artist.csv", "w") as out: - writer = csv.writer(out) - writer.writerow([ - "id:ID(Artist)", "name", "year:short", ":LABEL", "listeners:int", "playcount:int" - ]) - - with open("repo/artist.csv") as f: - reader = csv.reader(f) - - reader.__next__() # Skip header - for row in reader: - writer.writerow([ - row[0], - row[1], - row[2], - row[3], - artist_listeners.get(row[0], (0, 0))[0], - artist_listeners.get(row[0], (0, 0))[1], - ]) - artists.add(row[0]) - - with open("repo/lastfm_artist_artist.csv", "w") as out: - out.write(",".join(( - ":START_ID(Artist)", ":END_ID(Artist)", "weight:float" - )) + "\n") - - for x in related: - if x[0] in artists and x[1] in artists: - out.write(",".join(x) + "\n") - - -patch(sys.argv[1]) diff --git a/process_mb_dump.py b/process_mb_dump.py deleted file mode 100644 index b4d369f..0000000 --- a/process_mb_dump.py +++ /dev/null @@ -1,393 +0,0 @@ -import os -from collections import defaultdict - -links = dict() -link_types = dict() -areas = dict() -labels = dict() -label_types = { - "\\N": "" -} -release_groups = dict() -release_statuses = dict() -release_to_release_group_map = dict() -release_types = { - "\\N": "", -} -artists = dict() -tags = dict() - -release_release_rel_map = { - "covers and versions": "", - "remixes and compilations": "", - "DJ-mix": "IS_DJ_MIX_OF", - "live performance": "IS_LIVE_PERFORMANCE_OF", - "cover": "IS_COVER_OF", - "remix": "IS_REMIX_OF", - "mashes up": "IS_MASHUP_OF", - "included in": "INCLUDED_IN", - "single from": "IS_SINGLE_FROM" -} - -artist_release_rel_map = { - "translator": "TRANSLATED", - "liner notes": "WROTE_LINER_NOTES", - "lyricist": "IS_LYRICIST_FOR", - "lacquer cut": "DID_LACQUER_CUT_FOR", - "samples from artist": "HAS_SAMPLES_IN", - "remixes and compilations": "", - "composition": "COMPOSED", - "booking": "DID_BOOKING_FOR", - "balance": "DID_BALANCE_FOR", - "misc": "HAS_MISC_ROLE_IN", - "conductor": "CONDUCTED", - "legal representation": "PROVIDED_LEGAL_REPRESENTATION_FOR", - "design/illustration": "DID_DESIGN_FOR", - "performing orchestra": "PERFORMED_FOR", - "producer": "PRODUCED", - "instrument": "PERFORMED_INSTRUMENT_FOR", - "writer": "WROTE_LYRICS_FOR", - "production": "DID_PRODUCTION_FOR", - "performance": "PERFORMED_FOR", - "composer": "IS_COMPOSER_FOR", - "sound": "DID_SOUND_FOR", - "remixer": "DID_REMIXING_FOR", - "orchestrator": "IS_ORCHESTRATOR_FOR", - "compiler": "DID_COMPILATION_FOR", - "vocal arranger": "IS_ARRANGER_FOR", - "arranger": "IS_ARRENGER_FOR", - "mix-DJ": "MIXED", - "editor": "IS_EDITOR_FOR", - "illustration": "DID_ILLUSTRATION_FOR", - "audio": "DID_AUDIO_FOR", - "publishing": "IS_PUBLISHER_FOR", - "art direction": "DID_ART_DIRECTOR_FOR", - "design": "DID_DESIGN_FOR", - "instrument arranger": "IS_ARRANGER_FOR", - "chorus master": "IS_CHORUS_MASTER_FOR", - "photography": "DID_PHOTOGRAPHY_FOR", - "performer": "PERFORMED_IN", - "graphic design": "DID_GRAPHIC_DESIGN_FOR", - "booklet editor": "IS_BOOKLET_EDITOR_FOR", - "programming": "DID_PROGRAMING_FOR", - "copyright": "IS_COPYRIGHT_HOLDER_OF", - "piano technician": "IS_PIANO_TECNICIAN_FOR", - "phonographic copyright": "IS_PHONOGRAPHIC_COPYRIGHT_HOLDER_OF", - "mastering": "DID_MASTERING_FOR", - "vocal": "PERFORED_VOCALS_FOR", - "librettist": "IS_LIBRETTIST_FOR", - "mix": "MIXED", - "recording": "DID_RECORDING_FOR", - "concertmaster": "IS_CONCERTMASTER_FOR", - "engineer": "IS_ENGINEER_FOR", - - # release_group - "tribute": "IS_TRIBUTE_TO", - "dedicated to": "IS_DEDICATED_TO", - "creative direction": "", - "artists and repertoire": "" -} - -artist_artist_rel_map = { - "teacher": "TEACHER_OF", - "composer-in-residence": "HAS_COMPOSER-IN-RESIDENCE_STATUS_IN", - "member of band": "IS_MEMBER_OF", - "voice actor": "IS_VOICE_ACTOR_OF", - "tribute": "IS_TRIBUTE_TO", - "supporting musician": "IS_SUPPORTING_MUSICIAN_OF", - "instrumental supporting musician": "IS_INSTRUMENTAL_SUPPORTING_MUSICIAN_OF", - "personal relationship": "HAS_PERSONAL_RELATIONSHIP_WITH", - "musical relationships": "HAS_MUSICAL_RELATIONSHIP_WITH", - "collaboration": "HAS_COLLABORATED_WITH", - "married": "IS_MARRIED_WITH", - "sibling": "IS_SIBLING_OF", - "parent": "IS_PARENT_OF", - "is person": "IS", - "conductor position": "IS_CONDUCTOR_OF", - "vocal supporting musician": "DOES_VOCAL_SUPPORT_FOR", - "artistic director": "IS_ARTIST_DIRECTOR_OF", - "subgroup": "IS_SUBGROUP_OF", - "founder": "IS_FOUNDER_OF", - "involved with": "IS_INVOLVED_WITH", - "named after": "IS_NAMED_AFTER", -} - -label_label_rel_map = { - "label rename": "WAS_RENAMED_TO", - "imprint": "DOES_IMPRINT_FOR", - "label distribution": "DOES_DISTRIBUTION_FOR", - "business association": "HAS_BUSINESS_ASSOCIATION_TO", - "label ownership": "OWNS", - "label reissue": "DOES_REISSUING_FOR" -} - -if not os.path.exists("repo"): - os.mkdir("repo") -else: - os.system("rm repo/*") -if not os.path.exists("tmp"): - os.mkdir("tmp") -else: - os.system("rm tmp/*") - -with open("in/link", "r") as f: - for line in f: - cols = line.split("\t") - links[cols[0]] = cols - -with open("in/release_status", "r") as f: - for line in f: - cols = line.split("\t") - release_statuses[cols[0]] = cols - -with open("in/link_type", "r") as f: - for line in f: - cols = line.split("\t") - link_types[cols[0]] = cols - -with open("in/area", "r") as f: - for line in f: - cols = line.split("\t") - areas[cols[0]] = cols - -with open("in/label_type") as f: - for line in f: - cols = line.split("\t") - - label_types[cols[0]] = ";" + cols[1].replace(" ", "") - - if cols[3] != "\\N" and cols[2] in label_types: - label_types[cols[0]] += label_types[cols[2]].replace(" ", "") - -with open("in/artist") as f: - for line in f: - cols = line.split("\t") - artists[cols[0]] = cols - -with open("repo/area_area.csv", "w") as out: - out.write(":START_ID(Area),:END_ID(Area)\n") - - with open("in/l_area_area", "r") as f: - for line in f: - cols = line.split("\t") - out.write(",".join((areas[cols[3]][1], - areas[cols[2]][1] - )) + "\n") - -with open("repo/area.csv", "w") as out: - out.write("id:ID(Area),name\n") - - for k, area in areas.items(): - out.write(",".join((area[1], - '"' + area[2] + '"' - )) + "\n") - -# ------ - - -out_artist = open("repo/artist.csv", "w") -out_artist_area = open("repo/artist_area.csv", "w") - -out_artist.write("id:ID(Artist),name,year:int,:LABEL\n") -out_artist_area.write(":START_ID(Artist),:END_ID(Area)\n") - -for _, artist in artists.items(): - out_artist.write(",".join(( - artist[1], - '"' + artist[2].replace("\"", "\"\"") + '"', - artist[4] if artist[4] != "\\N" else "0", - "Artist" + (";Group\n" if artist[10] == "2" else "\n") - ))) - - if artist[11] != "\\N": - out_artist_area.write(artist[1] + "," + areas[artist[11]][1] + "\n") - -out_artist.close() -out_artist_area.close() - -with open("repo/artist_artist.csv", "w") as out: - out.write(":START_ID(Artist),:END_ID(Artist),:TYPE\n") - - with open("in/l_artist_artist", "r") as f: - for line in f: - cols = line.split("\t") - out.write(",".join(( - artists[cols[2]][1], - artists[cols[3]][1], - artist_artist_rel_map[link_types[links[cols[1]][1]][6]] + "\n" - ))) - -# -------- - -with open("in/release_group_primary_type") as f: - for line in f: - cols = line.split("\t") - release_types[cols[0]] = ";" + cols[1] - -release_group_year = dict() -with open("in/release_group_meta") as f: - for line in f: - cols = line.split("\t") - release_group_year[cols[0]] = cols[2] if cols[2] != "\\N" else "0" - -with open("repo/release.csv", "w") as out: - out.write("id:ID(Release),name,year:int,:LABEL\n") - - with open("in/release_group") as f: - for line in f: - cols = line.split("\t") - out.write(",".join(( - cols[1], - '"' + cols[2].replace("\"", "\"\"") + '"', - release_group_year[cols[0]], - "Release" + release_types[cols[4]], - )) + "\n") - - release_groups[cols[0]] = cols - -with open("in/release") as f: - for line in f: - cols = line.split("\t") - if cols[5] != '\\N' and release_statuses[cols[5]][1] == "Official": - release_to_release_group_map[cols[0]] = cols[4] - -credit_names = defaultdict(list) - -with open("in/artist_credit_name") as f: - for line in f: - cols = line.split("\t") - credit_names[cols[0]].append(artists[cols[2]][1]) - -with open("tmp/tmp_artist_release.csv", "w") as out: - out.write(":START_ID(Artist),:END_ID(Release),:TYPE\n") - - # Is this part really necessary? - with open("in/l_artist_release") as f: - for line in f: - cols = line.split("\t") - if cols[3] in release_to_release_group_map: - out.write(",".join(( - artists[cols[2]][1], - release_groups[release_to_release_group_map[cols[3]]][1], - artist_release_rel_map[link_types[links[cols[1]][1]][6]] - )) + "\n") - - # Artist credits - with open("in/release") as f: - for line in f: - cols = line.split("\t") - if cols[0] in release_to_release_group_map: - for credit in credit_names[cols[3]]: - out.write(",".join(( - credit, - release_groups[release_to_release_group_map[cols[0]]][1], - "CREDITED_FOR" - )) + "\n") - -# Remove dupes -os.system("(head -n 1 tmp/tmp_artist_release.csv && tail -n +2 tmp/tmp_artist_release.csv" - " | sort) | uniq > repo/artist_release.csv && rm tmp/tmp_artist_release.csv") - - -with open("repo/release_release.csv", "w") as out: - out.write(":START_ID(Release),:END_ID(Release),:TYPE\n") - - with open("in/l_release_group_release_group") as f: - for line in f: - cols = line.split("\t") - out.write(",".join(( - release_groups[cols[2]][1], - release_groups[cols[3]][1], - release_release_rel_map[link_types[links[cols[1]][1]][6]] - )) + "\n") - -# --- - -with open("in/tag") as f: - with open("repo/tag.csv", "w") as out: - out.write("id:ID(Tag),name\n") - - for line in f: - cols = line.split("\t") - tags[cols[0]] = cols - out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n") - -with open("repo/release_tag.csv", "w") as out: - out.write(":START_ID(Release),:END_ID(Tag),weight:int\n") - - with open("in/release_group_tag") as f: - for line in f: - cols = line.split("\t") - - if int(cols[2]) <= 0: - continue - - out.write(",".join(( - release_groups[cols[0]][1], - cols[1], - cols[2], - )) + "\n") - -with open("repo/artist_tag.csv", "w") as out: - out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n") - - with open("in/artist_tag") as f: - for line in f: - cols = line.split("\t") - - if int(cols[2]) <= 0: - continue - - out.write(",".join(( - artists[cols[0]][1], - cols[1], - cols[2], - )) + "\n") - -with open("repo/tag_tag.csv", "w") as out: - out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n") - - with open("in/tag_relation") as f: - for line in f: - cols = line.split("\t") - - if int(cols[2]) <= 0: - continue - - out.write(",".join(( - cols[0], - cols[1], - cols[2], - )) + "\n") - -# ----- - -with open("repo/labels.csv", "w") as out: - out.write("id:ID(Label),name,code,:LABEL\n") - - with open("in/label") as f: - for line in f: - cols = line.split("\t") - labels[cols[0]] = cols - - out.write(",".join(( - cols[1], - "\"" + cols[2].replace("\"", "\"\"") + "\"", - cols[9] if cols[9] != "\\N" else "", - "Label" + label_types[cols[10]] - )) + "\n") - -with open("repo/label_label.csv", "w") as out: - out.write(":START_ID(Label),:END_ID(Label),:TYPE\n") - - with open("in/l_label_label") as f: - for line in f: - cols = line.split("\t") - - out.write(",".join(( - labels[cols[2]][1], - labels[cols[3]][1], - label_label_rel_map[link_types[links[cols[1]][1]][6]] - )) + "\n") - -# --- diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f229360..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests diff --git a/seed.cypher b/seed.cypher deleted file mode 100644 index 7a2fc34..0000000 --- a/seed.cypher +++ /dev/null @@ -1,2 +0,0 @@ -CREATE INDEX ON :Artist(id); -CREATE INDEX ON :Release(id); diff --git a/seed_neo4j_db.sh b/seed_neo4j_db.sh deleted file mode 100755 index 846b8ef..0000000 --- a/seed_neo4j_db.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3" - -cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell diff --git a/task_tracker_drone b/task_tracker_drone deleted file mode 160000 index e025596..0000000 --- a/task_tracker_drone +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e025596cf2ccfbe803f05cb848927ae36fe270a3