Initial commit

This commit is contained in:
simon987 2019-05-08 20:45:35 -04:00
parent 5fb05d591a
commit 6d8c65fcd2
21 changed files with 784 additions and 0 deletions

8
.gitignore vendored
View File

@ -102,3 +102,11 @@ venv.bak/
# mypy
.mypy_cache/
.idea/
in/
repo/
tmp/
workspace/
worker.json
*.db

9
.gitmodules vendored Normal file
View File

@ -0,0 +1,9 @@
[submodule "task_tracker_drone"]
path = task_tracker_drone
url = https://github.com/simon987/task_tracker_drone/
[submodule "last.fm"]
path = last.fm
url = https://git.simon987.net/drone/last.fm
[submodule "caa"]
path = caa
url = https://git.simon987.net/drone/caa.git

6
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/music-graph-scripts.iml" filepath="$PROJECT_DIR$/.idea/music-graph-scripts.iml" />
</modules>
</component>
</project>

9
.idea/music-graph-scripts.iml generated Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

0
README.md Normal file
View File

0
__init__.py Normal file
View File

1
caa Submodule

@ -0,0 +1 @@
Subproject commit 910f4a0bceadac37ac28fa59e7648f241c931fe6

20
download_mb_dump.sh Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
latest=$(curl http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/LATEST)
mkdir in 2> /dev/null
cd in
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump.tar.bz2"
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump-derived.tar.bz2"
tar -xjvf mbdump.tar.bz2 mbdump/area mbdump/artist mbdump/l_area_area mbdump/l_artist_artist \
mbdump/l_artist_release mbdump/l_artist_release_group mbdump/l_label_label mbdump/l_release_group_release_group \
mbdump/label mbdump/label_type mbdump/link mbdump/link_type mbdump/release mbdump/release_group \
mbdump/release_group_primary_type mbdump/artist_credit_name mbdump/release_status
tar -xjvf mbdump-derived.tar.bz2 mbdump/artist_tag mbdump/release_group_tag mbdump/tag mbdump/tag_relation \
mbdump/release_group_meta
mv mbdump/* .
rm -r mbdump
cd ..

27
extract_covers.py Normal file
View File

@ -0,0 +1,27 @@
import sqlite3
import sys
with sqlite3.connect(sys.argv[1]) as conn:
cursor = conn.cursor()
cursor.execute("SELECT id from covers")
cursor = conn.cursor()
cursor.execute("SELECT id from covers")
def rows():
buf = list()
for row in cursor.fetchall():
buf.append(row[0])
if len(buf) > 30:
yield buf
buf.clear()
for batch in rows():
cursor.execute("SELECT cover from covers where id in (%s)" % (",".join(("'" + b + "'") for b in batch)))
covers = cursor.fetchall()
for i, cover in enumerate(covers):
with open("./tmpcovers/" + batch[i] + ".jpg", "wb") as out:
out.write(cover[0])
print(batch[i])

56
generate_caa_tasks.py Normal file
View File

@ -0,0 +1,56 @@
import json
from multiprocessing.pool import ThreadPool
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
TT_API_URL = "https://tt.simon987.net/api"
TT_PROJECT = 5
done = set()
# with sqlite3.connect(sys.argv[1]) as conn:
# cur = conn.cursor()
# cur.execute("SELECT id FROM covers")
# for mbid in cur.fetchall():
# done.add(mbid[0])
api = TaskTrackerApi(TT_API_URL)
worker = Worker.from_file(api)
if not worker:
worker = api.make_worker("caa scraper")
worker.dump_to_file()
worker.request_access(TT_PROJECT, True, True)
input("Give permission to " + worker.alias)
def mktask(mbids):
res = worker.submit_task(
project=TT_PROJECT,
recipe=json.dumps(mbids),
hash64=hash(mbids[0]),
max_assign_time=60 * 30,
priority=1,
unique_str=None,
verification_count=None,
max_retries=5,
)
print(res.text)
def lines():
with open("in/release") as f:
buf = list()
for line in f:
cols = line.split("\t")
buf.append(cols[1])
if len(buf) == 75:
a = list(buf)
buf.clear()
yield a
pool = ThreadPool(processes=20)
pool.map(func=mktask, iterable=lines())

48
generate_lastfm_tasks.py Normal file
View File

@ -0,0 +1,48 @@
import csv
import json
from multiprocessing.pool import ThreadPool
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
TT_API_URL = "https://tt.simon987.net/api"
TT_PROJECT = 1
api = TaskTrackerApi(TT_API_URL)
worker = Worker.from_file(api)
if not worker:
worker = api.make_worker("last.fm scraper")
worker.dump_to_file()
worker.request_access(TT_PROJECT, True, True)
input("Give permission to " + worker.alias)
with open("repo/artist.csv") as f:
reader = csv.reader(f)
def mktask(lines):
res = worker.submit_task(
project=TT_PROJECT,
recipe=json.dumps(
[{"mbid": line[0], "name": line[1]} for line in lines]
),
unique_str=lines[0][0],
max_assign_time=60 * 5,
)
print(res.text)
def lines():
line_batch = list()
for line in reader:
if "Group" in line[3]:
line_batch.append(line)
if len(line_batch) >= 30:
res = list(line_batch)
line_batch.clear()
yield res
tasks = list(lines())
pool = ThreadPool(processes=25)
pool.map(func=mktask, iterable=tasks)

1
last.fm Submodule

@ -0,0 +1 @@
Subproject commit 855df64c316930062ff4f7740492d0f039788498

53
make_neoj4_db.sh Executable file
View File

@ -0,0 +1,53 @@
#!/bin/bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
export REPOSITORY="http://localhost:9999"
export DATABASE="graph.db"
rm -rf "${NEO4J_HOME}/data/databases/${DATABASE}"
cp ${NEO4J_HOME}/conf/neo4j.conf ${NEO4J_HOME}/conf/neo4j.conf.bak
echo "dbms.security.auth_enabled=false" >> ${NEO4J_HOME}/conf/neo4j.conf
mkdir workspace 2> /dev/null
cd workspace
rm *.csv
wget ${REPOSITORY}/area.csv
wget ${REPOSITORY}/area_area.csv
wget ${REPOSITORY}/lastfm_artist.csv
wget ${REPOSITORY}/artist_area.csv
wget ${REPOSITORY}/artist_artist.csv
wget ${REPOSITORY}/artist_release.csv
wget ${REPOSITORY}/release.csv
wget ${REPOSITORY}/tag.csv
wget ${REPOSITORY}/tag_tag.csv
wget ${REPOSITORY}/release_tag.csv
wget ${REPOSITORY}/release_release.csv
wget ${REPOSITORY}/artist_tag.csv
wget ${REPOSITORY}/labels.csv
wget ${REPOSITORY}/label_label.csv
wget ${REPOSITORY}/lastfm_artist_artist.csv
. ${NEO4J_HOME}/bin/neo4j-admin import \
--database ${DATABASE}\
--high-io=true\
--nodes:Area:MusicBrainzEntity "area.csv"\
--nodes:MusicBrainzEntity "release.csv"\
--nodes:MusicBrainzEntity "lastfm_artist.csv"\
--nodes:Tag "tag.csv"\
--nodes:MusicBrainzEntity "labels.csv"\
--relationships:IS_PART_OF "area_area.csv"\
--relationships:IS_BASED_IN "artist_area.csv"\
--relationships "artist_artist.csv"\
--relationships "artist_release.csv"\
--relationships:IS_TAGGED "release_tag.csv"\
--relationships:IS_TAGGED "artist_tag.csv"\
--relationships:IS_RELATED_TO "tag_tag.csv"\
--relationships "label_label.csv"\
--relationships "release_release.csv"\
--relationships:IS_RELATED_TO "lastfm_artist_artist.csv"
rm *.csv
cd ..

31
make_release_to_rg_map.py Normal file
View File

@ -0,0 +1,31 @@
import sqlite3
release_to_release_group_map = dict()
release_groups = dict()
with open("in/release_group") as f:
for line in f:
cols = line.split("\t")
release_groups[cols[0]] = cols[1]
with open("in/release") as f:
for line in f:
cols = line.split("\t")
release_to_release_group_map[cols[1]] = release_groups[cols[4]]
with sqlite3.connect("mapdb.db") as conn:
cursor = conn.cursor()
cursor.execute("CREATE TABLE map (release TEXT PRIMARY KEY , release_group TEXT)")
for k, v in release_to_release_group_map.items():
cursor.execute("INSERT INTO map (release, release_group) VALUES (?,?)", (k, v))
conn.commit()
"""
CREATE TABLE covers (id TEXT primary key, cover BLOB);
ATTACH 'mapdb.db' AS map;
ATTACH '/mnt/Data8/caa_tn_only.db' AS source;
INSERT OR IGNORE INTO covers SELECT release_group, cover FROM source.covers INNER JOIN map.map ON id = map.release;
"""

100
process_lastfm_data.py Normal file
View File

@ -0,0 +1,100 @@
import csv
import json
import sqlite3
from collections import defaultdict
import sys
artists = set()
def disambiguate(lfm_artist, artist_release_count, name, mbid):
existing_mbid = lfm_artist.get(name, None)
if existing_mbid and mbid != existing_mbid:
if artist_release_count[existing_mbid] < artist_release_count[mbid]:
lfm_artist[name] = mbid
print("Replacing %s (%s) with %s (%d) for %s" %
(existing_mbid, artist_release_count[existing_mbid],
mbid, artist_release_count[mbid],
name))
else:
lfm_artist[name] = mbid
def patch(lastfm_data):
artist_listeners = dict()
lastfm_artist_to_mbid = dict()
artist_release_count = defaultdict(int)
related = list()
with open("repo/artist_release.csv") as f:
for line in f:
cols = line.split(',')
artist_release_count[cols[0]] += 1
with sqlite3.connect(lastfm_data) as conn:
cur = conn.cursor()
cur.execute("SELECT data FROM lastfmdata", )
data = list(cur.fetchall())
# A lastfm artist name can refer to multiple MBIDs
# For RELATED_TO purposes, we assume that the MBID referring
# to the artist with the most official releases is the one
for row in data:
meta = json.loads(row[0])
disambiguate(lastfm_artist_to_mbid, artist_release_count, meta["name"], meta["artist"])
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
disambiguate(lastfm_artist_to_mbid, artist_release_count, similar["name"], similar["mbid"])
# Get related links & listener counts
for row in data:
meta = json.loads(row[0])
artist_listeners[lastfm_artist_to_mbid[meta["name"]]] = \
(meta["listeners"], meta["playcount"])
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
related.append((
lastfm_artist_to_mbid[similar["name"]],
lastfm_artist_to_mbid[meta["name"]],
similar["match"]
))
with open("repo/lastfm_artist.csv", "w") as out:
writer = csv.writer(out)
writer.writerow([
"id:ID(Artist)", "name", "year:short", ":LABEL", "listeners:int", "playcount:int"
])
with open("repo/artist.csv") as f:
reader = csv.reader(f)
reader.__next__() # Skip header
for row in reader:
writer.writerow([
row[0],
row[1],
row[2],
row[3],
artist_listeners.get(row[0], (0, 0))[0],
artist_listeners.get(row[0], (0, 0))[1],
])
artists.add(row[0])
with open("repo/lastfm_artist_artist.csv", "w") as out:
out.write(",".join((
":START_ID(Artist)", ":END_ID(Artist)", "weight:float"
)) + "\n")
for x in related:
if x[0] in artists and x[1] in artists:
out.write(",".join(x) + "\n")
patch(sys.argv[1])

393
process_mb_dump.py Normal file
View File

@ -0,0 +1,393 @@
import os
from collections import defaultdict
links = dict()
link_types = dict()
areas = dict()
labels = dict()
label_types = {
"\\N": ""
}
release_groups = dict()
release_statuses = dict()
release_to_release_group_map = dict()
release_types = {
"\\N": "",
}
artists = dict()
tags = dict()
release_release_rel_map = {
"covers and versions": "",
"remixes and compilations": "",
"DJ-mix": "IS_DJ_MIX_OF",
"live performance": "IS_LIVE_PERFORMANCE_OF",
"cover": "IS_COVER_OF",
"remix": "IS_REMIX_OF",
"mashes up": "IS_MASHUP_OF",
"included in": "INCLUDED_IN",
"single from": "IS_SINGLE_FROM"
}
artist_release_rel_map = {
"translator": "TRANSLATED",
"liner notes": "WROTE_LINER_NOTES",
"lyricist": "IS_LYRICIST_FOR",
"lacquer cut": "DID_LACQUER_CUT_FOR",
"samples from artist": "HAS_SAMPLES_IN",
"remixes and compilations": "",
"composition": "COMPOSED",
"booking": "DID_BOOKING_FOR",
"balance": "DID_BALANCE_FOR",
"misc": "HAS_MISC_ROLE_IN",
"conductor": "CONDUCTED",
"legal representation": "PROVIDED_LEGAL_REPRESENTATION_FOR",
"design/illustration": "DID_DESIGN_FOR",
"performing orchestra": "PERFORMED_FOR",
"producer": "PRODUCED",
"instrument": "PERFORMED_INSTRUMENT_FOR",
"writer": "WROTE_LYRICS_FOR",
"production": "DID_PRODUCTION_FOR",
"performance": "PERFORMED_FOR",
"composer": "IS_COMPOSER_FOR",
"sound": "DID_SOUND_FOR",
"remixer": "DID_REMIXING_FOR",
"orchestrator": "IS_ORCHESTRATOR_FOR",
"compiler": "DID_COMPILATION_FOR",
"vocal arranger": "IS_ARRANGER_FOR",
"arranger": "IS_ARRENGER_FOR",
"mix-DJ": "MIXED",
"editor": "IS_EDITOR_FOR",
"illustration": "DID_ILLUSTRATION_FOR",
"audio": "DID_AUDIO_FOR",
"publishing": "IS_PUBLISHER_FOR",
"art direction": "DID_ART_DIRECTOR_FOR",
"design": "DID_DESIGN_FOR",
"instrument arranger": "IS_ARRANGER_FOR",
"chorus master": "IS_CHORUS_MASTER_FOR",
"photography": "DID_PHOTOGRAPHY_FOR",
"performer": "PERFORMED_IN",
"graphic design": "DID_GRAPHIC_DESIGN_FOR",
"booklet editor": "IS_BOOKLET_EDITOR_FOR",
"programming": "DID_PROGRAMING_FOR",
"copyright": "IS_COPYRIGHT_HOLDER_OF",
"piano technician": "IS_PIANO_TECNICIAN_FOR",
"phonographic copyright": "IS_PHONOGRAPHIC_COPYRIGHT_HOLDER_OF",
"mastering": "DID_MASTERING_FOR",
"vocal": "PERFORED_VOCALS_FOR",
"librettist": "IS_LIBRETTIST_FOR",
"mix": "MIXED",
"recording": "DID_RECORDING_FOR",
"concertmaster": "IS_CONCERTMASTER_FOR",
"engineer": "IS_ENGINEER_FOR",
# release_group
"tribute": "IS_TRIBUTE_TO",
"dedicated to": "IS_DEDICATED_TO",
"creative direction": "",
"artists and repertoire": ""
}
artist_artist_rel_map = {
"teacher": "TEACHER_OF",
"composer-in-residence": "HAS_COMPOSER-IN-RESIDENCE_STATUS_IN",
"member of band": "IS_MEMBER_OF",
"voice actor": "IS_VOICE_ACTOR_OF",
"tribute": "IS_TRIBUTE_TO",
"supporting musician": "IS_SUPPORTING_MUSICIAN_OF",
"instrumental supporting musician": "IS_INSTRUMENTAL_SUPPORTING_MUSICIAN_OF",
"personal relationship": "HAS_PERSONAL_RELATIONSHIP_WITH",
"musical relationships": "HAS_MUSICAL_RELATIONSHIP_WITH",
"collaboration": "HAS_COLLABORATED_WITH",
"married": "IS_MARRIED_WITH",
"sibling": "IS_SIBLING_OF",
"parent": "IS_PARENT_OF",
"is person": "IS",
"conductor position": "IS_CONDUCTOR_OF",
"vocal supporting musician": "DOES_VOCAL_SUPPORT_FOR",
"artistic director": "IS_ARTIST_DIRECTOR_OF",
"subgroup": "IS_SUBGROUP_OF",
"founder": "IS_FOUNDER_OF",
"involved with": "IS_INVOLVED_WITH",
"named after": "IS_NAMED_AFTER",
}
label_label_rel_map = {
"label rename": "WAS_RENAMED_TO",
"imprint": "DOES_IMPRINT_FOR",
"label distribution": "DOES_DISTRIBUTION_FOR",
"business association": "HAS_BUSINESS_ASSOCIATION_TO",
"label ownership": "OWNS",
"label reissue": "DOES_REISSUING_FOR"
}
if not os.path.exists("repo"):
os.mkdir("repo")
else:
os.system("rm repo/*")
if not os.path.exists("tmp"):
os.mkdir("tmp")
else:
os.system("rm tmp/*")
with open("in/link", "r") as f:
for line in f:
cols = line.split("\t")
links[cols[0]] = cols
with open("in/release_status", "r") as f:
for line in f:
cols = line.split("\t")
release_statuses[cols[0]] = cols
with open("in/link_type", "r") as f:
for line in f:
cols = line.split("\t")
link_types[cols[0]] = cols
with open("in/area", "r") as f:
for line in f:
cols = line.split("\t")
areas[cols[0]] = cols
with open("in/label_type") as f:
for line in f:
cols = line.split("\t")
label_types[cols[0]] = ";" + cols[1].replace(" ", "")
if cols[3] != "\\N" and cols[2] in label_types:
label_types[cols[0]] += label_types[cols[2]].replace(" ", "")
with open("in/artist") as f:
for line in f:
cols = line.split("\t")
artists[cols[0]] = cols
with open("repo/area_area.csv", "w") as out:
out.write(":START_ID(Area),:END_ID(Area)\n")
with open("in/l_area_area", "r") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((areas[cols[3]][1],
areas[cols[2]][1]
)) + "\n")
with open("repo/area.csv", "w") as out:
out.write("id:ID(Area),name\n")
for k, area in areas.items():
out.write(",".join((area[1],
'"' + area[2] + '"'
)) + "\n")
# ------
out_artist = open("repo/artist.csv", "w")
out_artist_area = open("repo/artist_area.csv", "w")
out_artist.write("id:ID(Artist),name,year:int,:LABEL\n")
out_artist_area.write(":START_ID(Artist),:END_ID(Area)\n")
for _, artist in artists.items():
out_artist.write(",".join((
artist[1],
'"' + artist[2].replace("\"", "\"\"") + '"',
artist[4] if artist[4] != "\\N" else "0",
"Artist" + (";Group\n" if artist[10] == "2" else "\n")
)))
if artist[11] != "\\N":
out_artist_area.write(artist[1] + "," + areas[artist[11]][1] + "\n")
out_artist.close()
out_artist_area.close()
with open("repo/artist_artist.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Artist),:TYPE\n")
with open("in/l_artist_artist", "r") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
artists[cols[2]][1],
artists[cols[3]][1],
artist_artist_rel_map[link_types[links[cols[1]][1]][6]] + "\n"
)))
# --------
with open("in/release_group_primary_type") as f:
for line in f:
cols = line.split("\t")
release_types[cols[0]] = ";" + cols[1]
release_group_year = dict()
with open("in/release_group_meta") as f:
for line in f:
cols = line.split("\t")
release_group_year[cols[0]] = cols[2] if cols[2] != "\\N" else "0"
with open("repo/release.csv", "w") as out:
out.write("id:ID(Release),name,year:int,:LABEL\n")
with open("in/release_group") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
cols[1],
'"' + cols[2].replace("\"", "\"\"") + '"',
release_group_year[cols[0]],
"Release" + release_types[cols[4]],
)) + "\n")
release_groups[cols[0]] = cols
with open("in/release") as f:
for line in f:
cols = line.split("\t")
if cols[5] != '\\N' and release_statuses[cols[5]][1] == "Official":
release_to_release_group_map[cols[0]] = cols[4]
credit_names = defaultdict(list)
with open("in/artist_credit_name") as f:
for line in f:
cols = line.split("\t")
credit_names[cols[0]].append(artists[cols[2]][1])
with open("tmp/tmp_artist_release.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Release),:TYPE\n")
# Is this part really necessary?
with open("in/l_artist_release") as f:
for line in f:
cols = line.split("\t")
if cols[3] in release_to_release_group_map:
out.write(",".join((
artists[cols[2]][1],
release_groups[release_to_release_group_map[cols[3]]][1],
artist_release_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# Artist credits
with open("in/release") as f:
for line in f:
cols = line.split("\t")
if cols[0] in release_to_release_group_map:
for credit in credit_names[cols[3]]:
out.write(",".join((
credit,
release_groups[release_to_release_group_map[cols[0]]][1],
"CREDITED_FOR"
)) + "\n")
# Remove dupes
os.system("(head -n 1 tmp/tmp_artist_release.csv && tail -n +2 tmp/tmp_artist_release.csv"
" | sort) | uniq > repo/artist_release.csv && rm tmp/tmp_artist_release.csv")
with open("repo/release_release.csv", "w") as out:
out.write(":START_ID(Release),:END_ID(Release),:TYPE\n")
with open("in/l_release_group_release_group") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
release_groups[cols[2]][1],
release_groups[cols[3]][1],
release_release_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# ---
with open("in/tag") as f:
with open("repo/tag.csv", "w") as out:
out.write("id:ID(Tag),name\n")
for line in f:
cols = line.split("\t")
tags[cols[0]] = cols
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
with open("repo/release_tag.csv", "w") as out:
out.write(":START_ID(Release),:END_ID(Tag),weight:int\n")
with open("in/release_group_tag") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
release_groups[cols[0]][1],
cols[1],
cols[2],
)) + "\n")
with open("repo/artist_tag.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n")
with open("in/artist_tag") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
artists[cols[0]][1],
cols[1],
cols[2],
)) + "\n")
with open("repo/tag_tag.csv", "w") as out:
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
with open("in/tag_relation") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
continue
out.write(",".join((
cols[0],
cols[1],
cols[2],
)) + "\n")
# -----
with open("repo/labels.csv", "w") as out:
out.write("id:ID(Label),name,code,:LABEL\n")
with open("in/label") as f:
for line in f:
cols = line.split("\t")
labels[cols[0]] = cols
out.write(",".join((
cols[1],
"\"" + cols[2].replace("\"", "\"\"") + "\"",
cols[9] if cols[9] != "\\N" else "",
"Label" + label_types[cols[10]]
)) + "\n")
with open("repo/label_label.csv", "w") as out:
out.write(":START_ID(Label),:END_ID(Label),:TYPE\n")
with open("in/l_label_label") as f:
for line in f:
cols = line.split("\t")
out.write(",".join((
labels[cols[2]][1],
labels[cols[3]][1],
label_label_rel_map[link_types[links[cols[1]][1]][6]]
)) + "\n")
# ---

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
requests

2
seed.cypher Normal file
View File

@ -0,0 +1,2 @@
CREATE INDEX ON :Artist(id);
CREATE INDEX ON :Release(id);

5
seed_neo4j_db.sh Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell