mirror of
https://github.com/simon987/music-graph-scripts.git
synced 2025-04-10 05:56:42 +00:00
2.0 rewrite wip
This commit is contained in:
parent
c3dc1faa31
commit
3f9382e6f7
15
.gitmodules
vendored
15
.gitmodules
vendored
@ -1,15 +0,0 @@
|
|||||||
[submodule "task_tracker_drone"]
|
|
||||||
path = task_tracker_drone
|
|
||||||
url = https://github.com/simon987/task_tracker_drone/
|
|
||||||
[submodule "last.fm"]
|
|
||||||
path = last.fm
|
|
||||||
url = https://git.simon987.net/drone/last.fm
|
|
||||||
[submodule "caa"]
|
|
||||||
path = caa
|
|
||||||
url = https://git.simon987.net/drone/caa.git
|
|
||||||
[submodule "spotify"]
|
|
||||||
path = spotify
|
|
||||||
url = https://git.simon987.net/drone/spotify
|
|
||||||
[submodule "spotify2"]
|
|
||||||
path = spotify2
|
|
||||||
url = https://git.simon987.net/drone/spotify2
|
|
14
README.md
14
README.md
@ -1,17 +1,3 @@
|
|||||||
wip
|
wip
|
||||||
|
|
||||||
|
|
||||||
### task_tracker setup:
|
|
||||||
|
|
||||||
Last.fm api calls are queued to [task_tracker](https://github.com/simon987/task_tracker/),
|
|
||||||
and results are gathered by a [task_tracker_drone](https://github.com/simon987/task_tracker_drone/)
|
|
||||||
([script](https://git.simon987.net/drone/last.fm/src/master/run)).
|
|
||||||
|
|
||||||
|
|
||||||
Project secret:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"apikey": "<Your Last.fm api key>",
|
|
||||||
"user": "<Your Last.fm username>"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
1
caa
1
caa
@ -1 +0,0 @@
|
|||||||
Subproject commit 910f4a0bceadac37ac28fa59e7648f241c931fe6
|
|
22
config.py
Normal file
22
config.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"DB": "musicbrainz_db",
|
||||||
|
"USER": "musicbrainz",
|
||||||
|
"PASSWORD": "musicbrainz",
|
||||||
|
"HOST": "127.0.0.1",
|
||||||
|
"PORT": 5433,
|
||||||
|
|
||||||
|
"LASTFM_APIKEY": os.environ.get("LASTFM_APIKEY"),
|
||||||
|
"LASTFM_USER": os.environ.get("LASTFM_USER"),
|
||||||
|
|
||||||
|
"SPOTIFY_CLIENTID": os.environ.get("SPOTIFY_CLIENTID"),
|
||||||
|
"SPOTIFY_SECRET": os.environ.get("SPOTIFY_SECRET"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def connstr():
|
||||||
|
return " dbname=%s user=%s password=%s host=%s port=%d" % (
|
||||||
|
config["DB"], config["USER"], config["PASSWORD"],
|
||||||
|
config["HOST"], config["PORT"]
|
||||||
|
)
|
@ -1,21 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
latest=$(curl http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/LATEST)
|
|
||||||
|
|
||||||
mkdir in 2> /dev/null
|
|
||||||
cd in
|
|
||||||
|
|
||||||
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump.tar.bz2"
|
|
||||||
wget -nc "http://ftp.musicbrainz.org/pub/musicbrainz/data/fullexport/${latest}/mbdump-derived.tar.bz2"
|
|
||||||
|
|
||||||
tar -xjvf mbdump.tar.bz2 mbdump/area mbdump/artist mbdump/l_area_area mbdump/l_artist_artist \
|
|
||||||
mbdump/l_artist_release mbdump/l_artist_release_group mbdump/l_label_label mbdump/l_release_group_release_group \
|
|
||||||
mbdump/label mbdump/label_type mbdump/link mbdump/link_type mbdump/release mbdump/release_group \
|
|
||||||
mbdump/release_group_primary_type mbdump/artist_credit_name mbdump/release_status mbdump/l_label_release \
|
|
||||||
mbdump/l_label_release_group
|
|
||||||
tar -xjvf mbdump-derived.tar.bz2 mbdump/artist_tag mbdump/release_group_tag mbdump/tag mbdump/tag_relation \
|
|
||||||
mbdump/release_group_meta
|
|
||||||
|
|
||||||
mv mbdump/* .
|
|
||||||
rm -r mbdump
|
|
||||||
cd ..
|
|
@ -1,27 +0,0 @@
|
|||||||
import sqlite3
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
with sqlite3.connect(sys.argv[1]) as conn:
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute("SELECT id from covers")
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute("SELECT id from covers")
|
|
||||||
|
|
||||||
def rows():
|
|
||||||
buf = list()
|
|
||||||
for row in cursor.fetchall():
|
|
||||||
buf.append(row[0])
|
|
||||||
if len(buf) > 30:
|
|
||||||
yield buf
|
|
||||||
buf.clear()
|
|
||||||
|
|
||||||
for batch in rows():
|
|
||||||
cursor.execute("SELECT cover from covers where id in (%s)" % (",".join(("'" + b + "'") for b in batch)))
|
|
||||||
covers = cursor.fetchall()
|
|
||||||
for i, cover in enumerate(covers):
|
|
||||||
with open("./tmpcovers/" + batch[i] + ".jpg", "wb") as out:
|
|
||||||
out.write(cover[0])
|
|
||||||
print(batch[i])
|
|
@ -1,56 +0,0 @@
|
|||||||
import json
|
|
||||||
from multiprocessing.pool import ThreadPool
|
|
||||||
|
|
||||||
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
|
|
||||||
|
|
||||||
TT_API_URL = "https://tt.simon987.net/api"
|
|
||||||
TT_PROJECT = 5
|
|
||||||
|
|
||||||
|
|
||||||
done = set()
|
|
||||||
# with sqlite3.connect(sys.argv[1]) as conn:
|
|
||||||
# cur = conn.cursor()
|
|
||||||
# cur.execute("SELECT id FROM covers")
|
|
||||||
# for mbid in cur.fetchall():
|
|
||||||
# done.add(mbid[0])
|
|
||||||
|
|
||||||
api = TaskTrackerApi(TT_API_URL)
|
|
||||||
|
|
||||||
worker = Worker.from_file(api)
|
|
||||||
if not worker:
|
|
||||||
worker = api.make_worker("caa scraper")
|
|
||||||
worker.dump_to_file()
|
|
||||||
worker.request_access(TT_PROJECT, True, True)
|
|
||||||
input("Give permission to " + worker.alias)
|
|
||||||
|
|
||||||
|
|
||||||
def mktask(mbids):
|
|
||||||
res = worker.submit_task(
|
|
||||||
project=TT_PROJECT,
|
|
||||||
recipe=json.dumps(mbids),
|
|
||||||
hash64=hash(mbids[0]),
|
|
||||||
max_assign_time=60 * 30,
|
|
||||||
priority=1,
|
|
||||||
unique_str=None,
|
|
||||||
verification_count=None,
|
|
||||||
max_retries=5,
|
|
||||||
)
|
|
||||||
print(res.text)
|
|
||||||
|
|
||||||
|
|
||||||
def lines():
|
|
||||||
with open("in/release") as f:
|
|
||||||
buf = list()
|
|
||||||
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
|
|
||||||
buf.append(cols[1])
|
|
||||||
if len(buf) == 75:
|
|
||||||
a = list(buf)
|
|
||||||
buf.clear()
|
|
||||||
yield a
|
|
||||||
|
|
||||||
|
|
||||||
pool = ThreadPool(processes=20)
|
|
||||||
pool.map(func=mktask, iterable=lines())
|
|
@ -1,48 +0,0 @@
|
|||||||
import csv
|
|
||||||
import json
|
|
||||||
from multiprocessing.pool import ThreadPool
|
|
||||||
|
|
||||||
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker, LOG_TRACE
|
|
||||||
|
|
||||||
TT_API_URL = "https://tt.simon987.net/api"
|
|
||||||
TT_PROJECT = 1
|
|
||||||
|
|
||||||
api = TaskTrackerApi(TT_API_URL)
|
|
||||||
|
|
||||||
worker = Worker.from_file(api)
|
|
||||||
if not worker:
|
|
||||||
worker = api.make_worker("last.fm scraper")
|
|
||||||
worker.dump_to_file()
|
|
||||||
worker.request_access(TT_PROJECT, True, True)
|
|
||||||
input("Give permission to " + worker.alias)
|
|
||||||
|
|
||||||
with open("repo/artist.csv") as f:
|
|
||||||
reader = csv.reader(f)
|
|
||||||
|
|
||||||
def mktask(lines):
|
|
||||||
res = worker.submit_task(
|
|
||||||
project=TT_PROJECT,
|
|
||||||
recipe=json.dumps(
|
|
||||||
[{"mbid": line[0], "name": line[1]} for line in lines]
|
|
||||||
),
|
|
||||||
unique_str=lines[0][0],
|
|
||||||
max_assign_time=60 * 5,
|
|
||||||
)
|
|
||||||
print(res.text)
|
|
||||||
|
|
||||||
def lines():
|
|
||||||
line_batch = list()
|
|
||||||
|
|
||||||
for line in reader:
|
|
||||||
if "Group" in line[3]:
|
|
||||||
line_batch.append(line)
|
|
||||||
if len(line_batch) >= 30:
|
|
||||||
res = list(line_batch)
|
|
||||||
line_batch.clear()
|
|
||||||
yield res
|
|
||||||
|
|
||||||
tasks = list(lines())
|
|
||||||
|
|
||||||
pool = ThreadPool(processes=25)
|
|
||||||
pool.map(func=mktask, iterable=tasks)
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
|||||||
import csv
|
|
||||||
import json
|
|
||||||
from multiprocessing.pool import ThreadPool
|
|
||||||
|
|
||||||
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
|
|
||||||
|
|
||||||
TT_API_URL = "https://tt.simon987.net/api"
|
|
||||||
TT_PROJECT = 6
|
|
||||||
|
|
||||||
api = TaskTrackerApi(TT_API_URL)
|
|
||||||
|
|
||||||
worker = Worker.from_file(api)
|
|
||||||
if not worker:
|
|
||||||
worker = api.make_worker("mm worker")
|
|
||||||
worker.dump_to_file()
|
|
||||||
worker.request_access(TT_PROJECT, True, True)
|
|
||||||
input("Give permission to " + worker.alias)
|
|
||||||
|
|
||||||
with open("repo/artist.csv") as f:
|
|
||||||
reader = csv.reader(f)
|
|
||||||
|
|
||||||
def mktask(lines):
|
|
||||||
res = worker.submit_task(
|
|
||||||
project=TT_PROJECT,
|
|
||||||
recipe=json.dumps(
|
|
||||||
[{"mbid": line[0], "name": line[1]} for line in lines]
|
|
||||||
),
|
|
||||||
unique_str=lines[0][0],
|
|
||||||
max_assign_time=60 * 5,
|
|
||||||
)
|
|
||||||
print(res.text)
|
|
||||||
|
|
||||||
def lines():
|
|
||||||
line_batch = list()
|
|
||||||
|
|
||||||
for line in reader:
|
|
||||||
line_batch.append(line)
|
|
||||||
if len(line_batch) >= 30:
|
|
||||||
res = list(line_batch)
|
|
||||||
line_batch.clear()
|
|
||||||
yield res
|
|
||||||
|
|
||||||
tasks = list(lines())
|
|
||||||
|
|
||||||
pool = ThreadPool(processes=25)
|
|
||||||
pool.map(func=mktask, iterable=tasks)
|
|
||||||
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
|||||||
import json
|
|
||||||
import sqlite3
|
|
||||||
from multiprocessing.pool import ThreadPool
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker
|
|
||||||
|
|
||||||
TT_API_URL = "https://tt.simon987.net/api"
|
|
||||||
TT_PROJECT = 7
|
|
||||||
|
|
||||||
api = TaskTrackerApi(TT_API_URL)
|
|
||||||
|
|
||||||
worker = Worker.from_file(api)
|
|
||||||
if not worker:
|
|
||||||
worker = api.make_worker("mm worker")
|
|
||||||
worker.dump_to_file()
|
|
||||||
worker.request_access(TT_PROJECT, True, True)
|
|
||||||
input("Give permission to " + worker.alias)
|
|
||||||
|
|
||||||
spotids = set()
|
|
||||||
|
|
||||||
with sqlite3.connect(sys.argv[1]) as conn:
|
|
||||||
|
|
||||||
cur = conn.cursor()
|
|
||||||
cur.execute("SELECT data from artist")
|
|
||||||
for row in cur.fetchall():
|
|
||||||
j = json.loads(row[0])
|
|
||||||
if j is None or "artists" not in j or "items" not in j["artists"]:
|
|
||||||
continue
|
|
||||||
for item in j["artists"]["items"]:
|
|
||||||
spotids.add(item["id"])
|
|
||||||
|
|
||||||
|
|
||||||
def mktask(lines):
|
|
||||||
res = worker.submit_task(
|
|
||||||
project=TT_PROJECT,
|
|
||||||
recipe=json.dumps(
|
|
||||||
[{"spotid": line} for line in lines]
|
|
||||||
),
|
|
||||||
unique_str=lines[0],
|
|
||||||
max_assign_time=60 * 5,
|
|
||||||
)
|
|
||||||
print(res.text)
|
|
||||||
|
|
||||||
def ids():
|
|
||||||
id_batch = list()
|
|
||||||
|
|
||||||
for spotid in spotids:
|
|
||||||
id_batch.append(spotid)
|
|
||||||
if len(id_batch) >= 30:
|
|
||||||
res = list(id_batch)
|
|
||||||
id_batch.clear()
|
|
||||||
yield res
|
|
||||||
|
|
||||||
tasks = list(ids())
|
|
||||||
|
|
||||||
pool = ThreadPool(processes=25)
|
|
||||||
pool.map(func=mktask, iterable=tasks)
|
|
||||||
|
|
1
last.fm
1
last.fm
@ -1 +0,0 @@
|
|||||||
Subproject commit 855df64c316930062ff4f7740492d0f039788498
|
|
@ -1,31 +0,0 @@
|
|||||||
import sqlite3
|
|
||||||
|
|
||||||
release_to_release_group_map = dict()
|
|
||||||
release_groups = dict()
|
|
||||||
|
|
||||||
with open("in/release_group") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
release_groups[cols[0]] = cols[1]
|
|
||||||
|
|
||||||
with open("in/release") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
release_to_release_group_map[cols[1]] = release_groups[cols[4]]
|
|
||||||
|
|
||||||
with sqlite3.connect("mapdb.db") as conn:
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute("CREATE TABLE map (release TEXT PRIMARY KEY , release_group TEXT)")
|
|
||||||
|
|
||||||
for k, v in release_to_release_group_map.items():
|
|
||||||
cursor.execute("INSERT INTO map (release, release_group) VALUES (?,?)", (k, v))
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
"""
|
|
||||||
CREATE TABLE covers (id TEXT primary key, cover BLOB);
|
|
||||||
ATTACH 'mapdb.db' AS map;
|
|
||||||
ATTACH '/mnt/Data8/caa_tn_only.db' AS source;
|
|
||||||
INSERT OR IGNORE INTO covers SELECT release_group, cover FROM source.covers INNER JOIN map.map ON id = map.release;
|
|
||||||
"""
|
|
||||||
|
|
391
mb_scratch.sql
Normal file
391
mb_scratch.sql
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
CREATE OR REPLACE FUNCTION fn_sortname(name text, mb_sortname text) RETURNS text AS
|
||||||
|
$$
|
||||||
|
declare
|
||||||
|
sn text;
|
||||||
|
BEGIN
|
||||||
|
|
||||||
|
sn = regexp_replace(name, '[^a-zA-Z0-9.\-!?&çéàâäëïöü'' ]', '_');
|
||||||
|
|
||||||
|
if length(replace(sn, '_', '')) = 0 then
|
||||||
|
return upper(regexp_replace(mb_sortname, '[^\w.\-!?& ]', '_'));
|
||||||
|
end if;
|
||||||
|
|
||||||
|
return upper(sn);
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fn_sortname(name text) RETURNS text AS
|
||||||
|
$$
|
||||||
|
BEGIN
|
||||||
|
return upper(regexp_replace(name, '[^a-zA-Z0-9.\-!?&çéàâäëïöü'' ]', '_'));
|
||||||
|
END
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TABLE mg.translate_artist_artist_rel
|
||||||
|
(
|
||||||
|
mb_name TEXT PRIMARY KEY,
|
||||||
|
mg_name TEXT
|
||||||
|
);
|
||||||
|
INSERT INTO mg.translate_artist_artist_rel
|
||||||
|
VALUES ('teacher', 'TEACHER_OF'),
|
||||||
|
('composer-in-residence', 'HAS_COMPOSER-IN-RESIDENCE_STATUS_IN'),
|
||||||
|
('member of band', 'IS_MEMBER_OF'),
|
||||||
|
('voice actor', 'IS_VOICE_ACTOR_OF'),
|
||||||
|
('tribute', 'IS_TRIBUTE_TO'),
|
||||||
|
('supporting musician', 'IS_SUPPORTING_MUSICIAN_OF'),
|
||||||
|
('instrumental supporting musician', 'IS_INSTRUMENTAL_SUPPORTING_MUSICIAN_OF'),
|
||||||
|
('personal relationship', 'HAS_PERSONAL_RELATIONSHIP_WITH'),
|
||||||
|
('musical relationships', 'HAS_MUSICAL_RELATIONSHIP_WITH'),
|
||||||
|
('collaboration', 'HAS_COLLABORATED_WITH'),
|
||||||
|
('married', 'IS_MARRIED_WITH'),
|
||||||
|
('sibling', 'IS_SIBLING_OF'),
|
||||||
|
('parent', 'IS_PARENT_OF'),
|
||||||
|
('is person', 'IS'),
|
||||||
|
('conductor position', 'IS_CONDUCTOR_OF'),
|
||||||
|
('vocal supporting musician', 'DOES_VOCAL_SUPPORT_FOR'),
|
||||||
|
('artistic director', 'IS_ARTIST_DIRECTOR_OF'),
|
||||||
|
('subgroup', 'IS_SUBGROUP_OF'),
|
||||||
|
('founder', 'IS_FOUNDER_OF'),
|
||||||
|
('involved with', 'IS_INVOLVED_WITH'),
|
||||||
|
('named after', 'IS_NAMED_AFTER');
|
||||||
|
|
||||||
|
CREATE TABLE mg.translate_artist_release_rel
|
||||||
|
(
|
||||||
|
mb_name TEXT PRIMARY KEY,
|
||||||
|
mg_name text
|
||||||
|
);
|
||||||
|
INSERT INTO mg.translate_artist_release_rel
|
||||||
|
VALUES ('translator', 'TRANSLATED'),
|
||||||
|
('liner notes', 'WROTE_LINER_NOTES'),
|
||||||
|
('lyricist', 'IS_LYRICIST_FOR'),
|
||||||
|
('lacquer cut', 'DID_LACQUER_CUT_FOR'),
|
||||||
|
('samples from artist', 'HAS_SAMPLES_IN'),
|
||||||
|
('remixes and compilations', NULL),
|
||||||
|
('composition', 'COMPOSED'),
|
||||||
|
('booking', 'DID_BOOKING_FOR'),
|
||||||
|
('balance', 'DID_BALANCE_FOR'),
|
||||||
|
('misc', 'HAS_MISC_ROLE_IN'),
|
||||||
|
('conductor', 'CONDUCTED'),
|
||||||
|
('legal representation', 'PROVIDED_LEGAL_REPRESENTATION_FOR'),
|
||||||
|
('design/illustration', 'DID_DESIGN_FOR'),
|
||||||
|
('performing orchestra', 'PERFORMED_FOR'),
|
||||||
|
('producer', 'PRODUCED'),
|
||||||
|
('instrument', 'PERFORMED_INSTRUMENT_FOR'),
|
||||||
|
('writer', 'WROTE_LYRICS_FOR'),
|
||||||
|
('production', 'DID_PRODUCTION_FOR'),
|
||||||
|
('performance', 'PERFORMED_FOR'),
|
||||||
|
('composer', 'IS_COMPOSER_FOR'),
|
||||||
|
('sound', 'DID_SOUND_FOR'),
|
||||||
|
('remixer', 'DID_REMIXING_FOR'),
|
||||||
|
('orchestrator', 'IS_ORCHESTRATOR_FOR'),
|
||||||
|
('compiler', 'DID_COMPILATION_FOR'),
|
||||||
|
('vocal arranger', 'IS_ARRANGER_FOR'),
|
||||||
|
('arranger', 'IS_ARRENGER_FOR'),
|
||||||
|
('mix-DJ', 'MIXED'),
|
||||||
|
('editor', 'IS_EDITOR_FOR'),
|
||||||
|
('illustration', 'DID_ILLUSTRATION_FOR'),
|
||||||
|
('audio', 'DID_AUDIO_FOR'),
|
||||||
|
('publishing', 'IS_PUBLISHER_FOR'),
|
||||||
|
('art direction', 'DID_ART_DIRECTOR_FOR'),
|
||||||
|
('design', 'DID_DESIGN_FOR'),
|
||||||
|
('instrument arranger', 'IS_ARRANGER_FOR'),
|
||||||
|
('chorus master', 'IS_CHORUS_MASTER_FOR'),
|
||||||
|
('photography', 'DID_PHOTOGRAPHY_FOR'),
|
||||||
|
('performer', 'PERFORMED_IN'),
|
||||||
|
('graphic design', 'DID_GRAPHIC_DESIGN_FOR'),
|
||||||
|
('booklet editor', 'IS_BOOKLET_EDITOR_FOR'),
|
||||||
|
('programming', 'DID_PROGRAMING_FOR'),
|
||||||
|
('copyright', 'IS_COPYRIGHT_HOLDER_OF'),
|
||||||
|
('piano technician', 'IS_PIANO_TECNICIAN_FOR'),
|
||||||
|
('phonographic copyright', 'IS_PHONOGRAPHIC_COPYRIGHT_HOLDER_OF'),
|
||||||
|
('mastering', 'DID_MASTERING_FOR'),
|
||||||
|
('vocal', 'PERFORED_VOCALS_FOR'),
|
||||||
|
('librettist', 'IS_LIBRETTIST_FOR'),
|
||||||
|
('mix', 'MIXED'),
|
||||||
|
('recording', 'DID_RECORDING_FOR'),
|
||||||
|
('concertmaster', 'IS_CONCERTMASTER_FOR'),
|
||||||
|
('engineer', 'IS_ENGINEER_FOR'),
|
||||||
|
('tribute', 'IS_TRIBUTE_TO'),
|
||||||
|
('dedicated to', 'IS_DEDICATED_TO'),
|
||||||
|
('creative direction', NULL),
|
||||||
|
('artists and repertoire', NULL);
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE mg.translate_label_label_rel
|
||||||
|
(
|
||||||
|
mb_name TEXT PRIMARY KEY,
|
||||||
|
mg_name text
|
||||||
|
);
|
||||||
|
INSERT INTO mg.translate_label_label_rel
|
||||||
|
VALUES ('label rename', 'WAS_RENAMED_TO'),
|
||||||
|
('imprint', 'DOES_IMPRINT_FOR'),
|
||||||
|
('label distribution', 'DOES_DISTRIBUTION_FOR'),
|
||||||
|
('business association', 'HAS_BUSINESS_ASSOCIATION_TO'),
|
||||||
|
('label ownership', 'OWNS'),
|
||||||
|
('label reissue', 'DOES_REISSUING_FOR');
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.artist AS
|
||||||
|
SELECT gid as "id:ID(Artist)",
|
||||||
|
name,
|
||||||
|
fn_sortname(name, sort_name) as sortname,
|
||||||
|
COALESCE(begin_date_year, 0) as "year:int",
|
||||||
|
comment,
|
||||||
|
(CASE WHEN type = 2 THEN 'Group' ELSE 'Artist' END) as ":LABEL"
|
||||||
|
FROM artist;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.artist_artist AS
|
||||||
|
SELECT a0.gid as ":START_ID(Artist)",
|
||||||
|
a1.gid as ":END_ID(Artist)",
|
||||||
|
t.mg_name as ":TYPE"
|
||||||
|
FROM l_artist_artist
|
||||||
|
INNER JOIN artist a0 ON entity0 = a0.id
|
||||||
|
INNER JOIN artist a1 ON entity1 = a1.id
|
||||||
|
INNER JOIN link l on l.id = l_artist_artist.link
|
||||||
|
INNER JOIN link_type lt ON lt.id = l.link_type
|
||||||
|
INNER JOIN mg.translate_artist_artist_rel t ON t.mb_name = lt.name;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.release AS
|
||||||
|
SELECT release_group.gid as ":id:ID(Release)",
|
||||||
|
release_group.name,
|
||||||
|
m.first_release_date_year as "year:int",
|
||||||
|
CONCAT('Release;', t.name) as ":LABEL"
|
||||||
|
FROM release_group
|
||||||
|
INNER JOIN release_group_meta m ON m.id = release_group.id
|
||||||
|
INNER JOIN release_group_primary_type t ON t.id = release_group.type;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.artist_release AS
|
||||||
|
SELECT a.gid as ":START_ID(Artist)",
|
||||||
|
rg.gid as ":END_ID(Release)",
|
||||||
|
t.mg_name as ":TYPE"
|
||||||
|
FROM l_artist_release_group
|
||||||
|
INNER JOIN artist a on a.id = l_artist_release_group.entity0
|
||||||
|
INNER JOIN release_group rg on rg.id = l_artist_release_group.entity1
|
||||||
|
INNER JOIN link l on l.id = l_artist_release_group.link
|
||||||
|
INNER JOIN link_type lt ON lt.id = l.link_type
|
||||||
|
INNER JOIN mg.translate_artist_release_rel t ON t.mb_name = lt.name
|
||||||
|
UNION ALL
|
||||||
|
SELECT a.gid as ":START_ID(Artist)",
|
||||||
|
rg.gid as ":END_ID(Release)",
|
||||||
|
t.mg_name as ":TYPE"
|
||||||
|
FROM l_artist_release
|
||||||
|
INNER JOIN artist a on a.id = l_artist_release.entity0
|
||||||
|
INNER JOIN release r on r.id = l_artist_release.entity1
|
||||||
|
INNER JOIN release_group rg on rg.id = r.release_group
|
||||||
|
INNER JOIN link l on l.id = l_artist_release.link
|
||||||
|
INNER JOIN link_type lt ON lt.id = l.link_type
|
||||||
|
INNER JOIN mg.translate_artist_release_rel t ON t.mb_name = lt.name
|
||||||
|
UNION ALL
|
||||||
|
SELECT a.gid as ":START_ID(Artist)",
|
||||||
|
rg.gid as ":END_ID(Release)",
|
||||||
|
'CREDITED_FOR' as ":TYPE"
|
||||||
|
FROM release
|
||||||
|
INNER JOIN artist_credit_name cn ON cn.artist_credit = release.artist_credit
|
||||||
|
INNER JOIN artist a on a.id = cn.artist
|
||||||
|
INNER JOIN release_group rg on rg.id = release.release_group;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.tag AS
|
||||||
|
WITH occurences AS (
|
||||||
|
SELECT tag, COUNT(*) as count
|
||||||
|
FROM (
|
||||||
|
SELECT tag
|
||||||
|
FROM release_group_tag
|
||||||
|
UNION ALL
|
||||||
|
SELECT tag
|
||||||
|
FROM release_tag
|
||||||
|
) as tags
|
||||||
|
GROUP BY tag
|
||||||
|
)
|
||||||
|
SELECT tag.id as "id:ID(Tag)",
|
||||||
|
tag.name,
|
||||||
|
occurences.count as "occurences:int"
|
||||||
|
FROM tag
|
||||||
|
INNER JOIN occurences ON occurences.tag = tag.id
|
||||||
|
WHERE ref_count > 0
|
||||||
|
AND occurences.count > 5;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.release_tag AS
|
||||||
|
SELECT rg.gid as ":START_ID(Release)",
|
||||||
|
release_group_tag.tag as ":END_ID(Tag)",
|
||||||
|
greatest(least(release_group_tag.count::float / 6, 1), 0.2) as "weight:float"
|
||||||
|
FROM release_group_tag
|
||||||
|
INNER JOIN release_group rg ON rg.id = release_group_tag.release_group
|
||||||
|
INNER JOIN mg.tag t ON t."id:ID(Tag)" = release_group_tag.tag
|
||||||
|
WHERE release_group_tag.count > 0
|
||||||
|
UNION ALL
|
||||||
|
SELECT rg.gid as ":START_ID(Release)",
|
||||||
|
release_tag.tag as ":END_ID(Tag)",
|
||||||
|
greatest(least(release_tag.count::float / 6, 1), 0.2) as "weight:float"
|
||||||
|
FROM release_tag
|
||||||
|
INNER JOIN release r ON r.id = release_tag.release
|
||||||
|
INNER JOIN release_group rg ON rg.id = r.release_group
|
||||||
|
INNER JOIN mg.tag t ON t."id:ID(Tag)" = release_tag.tag
|
||||||
|
WHERE release_tag.count > 0;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.artist_tag AS
|
||||||
|
SELECT a.gid as ":START_ID(Artist)",
|
||||||
|
artist_tag.tag as ":END_ID(Tag)",
|
||||||
|
greatest(least(artist_tag.count::float / 8, 1), 0.2) as "weight:float"
|
||||||
|
FROM artist_tag
|
||||||
|
INNER JOIN artist a on artist_tag.artist = a.id
|
||||||
|
INNER JOIN mg.tag t ON t."id:ID(Tag)" = artist_tag.tag
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.tag_tag AS
|
||||||
|
SELECT tag_relation.tag1 as ":START_ID(Tag)",
|
||||||
|
tag_relation.tag2 as ":END_ID(Tag)",
|
||||||
|
greatest(least(tag_relation.weight::float / 12, 1), 0.2) as "weight:float"
|
||||||
|
FROM tag_relation;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.label AS
|
||||||
|
SELECT label.gid as "id:ID(Label)",
|
||||||
|
label.name,
|
||||||
|
fn_sortname(label.name) as sortname,
|
||||||
|
-- label_code as code,
|
||||||
|
concat('Label;', lt.name) as ":LABEL"
|
||||||
|
FROM label
|
||||||
|
INNER JOIN label_type lt on label.type = lt.id;
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.release_label AS
|
||||||
|
SELECT l.gid as ":START_ID(Release)",
|
||||||
|
r.gid as ":END_ID(Label)"
|
||||||
|
FROM l_label_release
|
||||||
|
INNER JOIN label l on l_label_release.entity0 = l.id
|
||||||
|
INNER JOIN release r on l_label_release.entity1 = r.id;
|
||||||
|
-- UNION
|
||||||
|
-- SELECT l.gid as ":START_ID(Release)",
|
||||||
|
-- r.gid as ":END_ID(Label)"
|
||||||
|
-- FROM l_label_release_group
|
||||||
|
-- INNER JOIN label l on l_label_release_group.entity0 = l.id
|
||||||
|
-- INNER JOIN release_group rg on l_label_release_group.entity1 = rg.id
|
||||||
|
-- INNER JOIN release r on r.release_group = rg.id
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW mg.label_label AS
|
||||||
|
SELECT l0.gid as ":START_ID(Label)",
|
||||||
|
l1.gid as ":END_ID(Label)",
|
||||||
|
t.mg_name as ":TYPE"
|
||||||
|
FROM l_label_label
|
||||||
|
INNER JOIN label l0 on l_label_label.entity0 = l0.id
|
||||||
|
INNER JOIN label l1 on l_label_label.entity1 = l1.id
|
||||||
|
INNER JOIN link l on l.id = l_label_label.link
|
||||||
|
INNER JOIN link_type lt ON lt.id = l.link_type
|
||||||
|
INNER JOIN mg.translate_label_label_rel t ON t.mb_name = lt.name
|
||||||
|
|
||||||
|
|
||||||
|
--------------
|
||||||
|
|
||||||
|
CREATE TABLE mg.covers
|
||||||
|
(
|
||||||
|
mbid uuid PRIMARY KEY,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
tn bytea
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.lastfm_artist
|
||||||
|
(
|
||||||
|
name TEXT PRIMARY KEY,
|
||||||
|
mbid uuid
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.lastfm_raw_data
|
||||||
|
(
|
||||||
|
name TEXT,
|
||||||
|
mbid uuid,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
data jsonb,
|
||||||
|
PRIMARY KEY (name, mbid)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.lastfm_artist_meta
|
||||||
|
(
|
||||||
|
name TEXT PRIMARY KEY,
|
||||||
|
listeners int,
|
||||||
|
playcount int
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.lastfm_artist_tag
|
||||||
|
(
|
||||||
|
name TEXT,
|
||||||
|
tag TEXT,
|
||||||
|
PRIMARY KEY (name, tag)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.lastfm_artist_artist
|
||||||
|
(
|
||||||
|
name0 TEXT,
|
||||||
|
name1 TEXT,
|
||||||
|
weight float,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
PRIMARY KEY (name0, name1)
|
||||||
|
);
|
||||||
|
|
||||||
|
--------------
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist
|
||||||
|
(
|
||||||
|
spotid TEXT PRIMARY KEY,
|
||||||
|
mbid UUID UNIQUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist_meta
|
||||||
|
(
|
||||||
|
spotid TEXT PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
followers int,
|
||||||
|
popularity int,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist_tag
|
||||||
|
(
|
||||||
|
spotid TEXT,
|
||||||
|
tag TEXT,
|
||||||
|
PRIMARY KEY (spotid, tag)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist_album
|
||||||
|
(
|
||||||
|
spotid TEXT,
|
||||||
|
album TEXT,
|
||||||
|
PRIMARY KEY (spotid, album)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist_track
|
||||||
|
(
|
||||||
|
spotid TEXT,
|
||||||
|
track TEXT,
|
||||||
|
url TEXT,
|
||||||
|
PRIMARY KEY (spotid, track)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_artist_artist
|
||||||
|
(
|
||||||
|
spotid0 TEXT,
|
||||||
|
spotid1 TEXT,
|
||||||
|
index int,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
PRIMARY KEY (spotid0, spotid1)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE mg.spotify_raw_data
|
||||||
|
(
|
||||||
|
query TEXT,
|
||||||
|
endpoint TEXT,
|
||||||
|
ts timestamp DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
data jsonb,
|
||||||
|
PRIMARY KEY (query, endpoint)
|
||||||
|
);
|
||||||
|
|
||||||
|
--------
|
||||||
|
CREATE OR REPLACE FUNCTION asciifold(text) RETURNS text
|
||||||
|
AS
|
||||||
|
'/pglib/libasciifolding.so',
|
||||||
|
'asciifold' LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION asciifold_lower(text) RETURNS text
|
||||||
|
AS
|
||||||
|
'/pglib/libasciifolding.so',
|
||||||
|
'asciifold_lower' LANGUAGE C STRICT;
|
@ -1,102 +0,0 @@
|
|||||||
import csv
|
|
||||||
import json
|
|
||||||
import sqlite3
|
|
||||||
from collections import defaultdict
|
|
||||||
import sys
|
|
||||||
|
|
||||||
artists = set()
|
|
||||||
|
|
||||||
|
|
||||||
def disambiguate(lfm_artist, artist_release_count, name, mbid):
|
|
||||||
existing_mbid = lfm_artist.get(name, None)
|
|
||||||
|
|
||||||
if existing_mbid and mbid != existing_mbid:
|
|
||||||
if artist_release_count[existing_mbid] < artist_release_count[mbid]:
|
|
||||||
|
|
||||||
lfm_artist[name] = mbid
|
|
||||||
|
|
||||||
# print("Replacing %s (%s) with %s (%d) for %s" %
|
|
||||||
# (existing_mbid, artist_release_count[existing_mbid],
|
|
||||||
# mbid, artist_release_count[mbid],
|
|
||||||
# name))
|
|
||||||
else:
|
|
||||||
lfm_artist[name] = mbid
|
|
||||||
|
|
||||||
|
|
||||||
def patch(lastfm_data):
|
|
||||||
|
|
||||||
artist_listeners = dict()
|
|
||||||
lastfm_artist_to_mbid = dict()
|
|
||||||
artist_release_count = defaultdict(int)
|
|
||||||
related = list()
|
|
||||||
|
|
||||||
with open("repo/artist_release.csv") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split(',')
|
|
||||||
artist_release_count[cols[0]] += 1
|
|
||||||
|
|
||||||
with sqlite3.connect(lastfm_data) as conn:
|
|
||||||
cur = conn.cursor()
|
|
||||||
cur.execute("SELECT data FROM lastfmdata", )
|
|
||||||
data = list(cur.fetchall())
|
|
||||||
|
|
||||||
# A lastfm artist name can refer to multiple MBIDs
|
|
||||||
# For RELATED_TO purposes, we assume that the MBID referring
|
|
||||||
# to the artist with the most official releases is the one
|
|
||||||
|
|
||||||
for row in data:
|
|
||||||
meta = json.loads(row[0])
|
|
||||||
|
|
||||||
disambiguate(lastfm_artist_to_mbid, artist_release_count, meta["name"], meta["artist"])
|
|
||||||
|
|
||||||
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
|
|
||||||
disambiguate(lastfm_artist_to_mbid, artist_release_count, similar["name"], similar["mbid"])
|
|
||||||
|
|
||||||
# Get related links & listener counts
|
|
||||||
for row in data:
|
|
||||||
meta = json.loads(row[0])
|
|
||||||
|
|
||||||
artist_listeners[lastfm_artist_to_mbid[meta["name"]]] = \
|
|
||||||
(meta["listeners"], meta["playcount"])
|
|
||||||
|
|
||||||
for similar in [s for s in meta["similar"] if s["mbid"] is not None]:
|
|
||||||
related.append((
|
|
||||||
lastfm_artist_to_mbid[similar["name"]],
|
|
||||||
lastfm_artist_to_mbid[meta["name"]],
|
|
||||||
similar["match"]
|
|
||||||
))
|
|
||||||
|
|
||||||
with open("repo/lastfm_artist.csv", "w") as out:
|
|
||||||
writer = csv.writer(out)
|
|
||||||
writer.writerow([
|
|
||||||
"id:ID(Artist)", "name", "sortname", "year:short", "comment", ":LABEL", "listeners:int", "playcount:int"
|
|
||||||
])
|
|
||||||
|
|
||||||
with open("repo/artist.csv") as f:
|
|
||||||
reader = csv.reader(f)
|
|
||||||
|
|
||||||
reader.__next__() # Skip header
|
|
||||||
for row in reader:
|
|
||||||
writer.writerow([
|
|
||||||
row[0],
|
|
||||||
row[1],
|
|
||||||
row[2],
|
|
||||||
row[3],
|
|
||||||
row[4],
|
|
||||||
row[5],
|
|
||||||
artist_listeners.get(row[0], (0, 0))[0],
|
|
||||||
artist_listeners.get(row[0], (0, 0))[1],
|
|
||||||
])
|
|
||||||
artists.add(row[0])
|
|
||||||
|
|
||||||
with open("repo/lastfm_artist_artist.csv", "w") as out:
|
|
||||||
out.write(",".join((
|
|
||||||
":START_ID(Artist)", ":END_ID(Artist)", "weight:float"
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
for x in related:
|
|
||||||
if x[0] in artists and x[1] in artists:
|
|
||||||
out.write(",".join(x) + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
patch(sys.argv[1])
|
|
@ -1,466 +0,0 @@
|
|||||||
import os
|
|
||||||
from collections import defaultdict
|
|
||||||
import re
|
|
||||||
from statistics import median
|
|
||||||
|
|
||||||
links = dict()
|
|
||||||
link_types = dict()
|
|
||||||
areas = dict()
|
|
||||||
labels = dict()
|
|
||||||
label_types = {
|
|
||||||
"\\N": ""
|
|
||||||
}
|
|
||||||
release_groups = dict()
|
|
||||||
release_statuses = dict()
|
|
||||||
release_to_release_group_map = dict()
|
|
||||||
release_types = {
|
|
||||||
"\\N": "",
|
|
||||||
}
|
|
||||||
artists = dict()
|
|
||||||
tags = dict()
|
|
||||||
|
|
||||||
release_release_rel_map = {
|
|
||||||
"covers and versions": "",
|
|
||||||
"remixes and compilations": "",
|
|
||||||
"DJ-mix": "IS_DJ_MIX_OF",
|
|
||||||
"live performance": "IS_LIVE_PERFORMANCE_OF",
|
|
||||||
"cover": "IS_COVER_OF",
|
|
||||||
"remix": "IS_REMIX_OF",
|
|
||||||
"mashes up": "IS_MASHUP_OF",
|
|
||||||
"included in": "INCLUDED_IN",
|
|
||||||
"single from": "IS_SINGLE_FROM"
|
|
||||||
}
|
|
||||||
|
|
||||||
artist_release_rel_map = {
|
|
||||||
"translator": "TRANSLATED",
|
|
||||||
"liner notes": "WROTE_LINER_NOTES",
|
|
||||||
"lyricist": "IS_LYRICIST_FOR",
|
|
||||||
"lacquer cut": "DID_LACQUER_CUT_FOR",
|
|
||||||
"samples from artist": "HAS_SAMPLES_IN",
|
|
||||||
"remixes and compilations": "",
|
|
||||||
"composition": "COMPOSED",
|
|
||||||
"booking": "DID_BOOKING_FOR",
|
|
||||||
"balance": "DID_BALANCE_FOR",
|
|
||||||
"misc": "HAS_MISC_ROLE_IN",
|
|
||||||
"conductor": "CONDUCTED",
|
|
||||||
"legal representation": "PROVIDED_LEGAL_REPRESENTATION_FOR",
|
|
||||||
"design/illustration": "DID_DESIGN_FOR",
|
|
||||||
"performing orchestra": "PERFORMED_FOR",
|
|
||||||
"producer": "PRODUCED",
|
|
||||||
"instrument": "PERFORMED_INSTRUMENT_FOR",
|
|
||||||
"writer": "WROTE_LYRICS_FOR",
|
|
||||||
"production": "DID_PRODUCTION_FOR",
|
|
||||||
"performance": "PERFORMED_FOR",
|
|
||||||
"composer": "IS_COMPOSER_FOR",
|
|
||||||
"sound": "DID_SOUND_FOR",
|
|
||||||
"remixer": "DID_REMIXING_FOR",
|
|
||||||
"orchestrator": "IS_ORCHESTRATOR_FOR",
|
|
||||||
"compiler": "DID_COMPILATION_FOR",
|
|
||||||
"vocal arranger": "IS_ARRANGER_FOR",
|
|
||||||
"arranger": "IS_ARRENGER_FOR",
|
|
||||||
"mix-DJ": "MIXED",
|
|
||||||
"editor": "IS_EDITOR_FOR",
|
|
||||||
"illustration": "DID_ILLUSTRATION_FOR",
|
|
||||||
"audio": "DID_AUDIO_FOR",
|
|
||||||
"publishing": "IS_PUBLISHER_FOR",
|
|
||||||
"art direction": "DID_ART_DIRECTOR_FOR",
|
|
||||||
"design": "DID_DESIGN_FOR",
|
|
||||||
"instrument arranger": "IS_ARRANGER_FOR",
|
|
||||||
"chorus master": "IS_CHORUS_MASTER_FOR",
|
|
||||||
"photography": "DID_PHOTOGRAPHY_FOR",
|
|
||||||
"performer": "PERFORMED_IN",
|
|
||||||
"graphic design": "DID_GRAPHIC_DESIGN_FOR",
|
|
||||||
"booklet editor": "IS_BOOKLET_EDITOR_FOR",
|
|
||||||
"programming": "DID_PROGRAMING_FOR",
|
|
||||||
"copyright": "IS_COPYRIGHT_HOLDER_OF",
|
|
||||||
"piano technician": "IS_PIANO_TECNICIAN_FOR",
|
|
||||||
"phonographic copyright": "IS_PHONOGRAPHIC_COPYRIGHT_HOLDER_OF",
|
|
||||||
"mastering": "DID_MASTERING_FOR",
|
|
||||||
"vocal": "PERFORED_VOCALS_FOR",
|
|
||||||
"librettist": "IS_LIBRETTIST_FOR",
|
|
||||||
"mix": "MIXED",
|
|
||||||
"recording": "DID_RECORDING_FOR",
|
|
||||||
"concertmaster": "IS_CONCERTMASTER_FOR",
|
|
||||||
"engineer": "IS_ENGINEER_FOR",
|
|
||||||
|
|
||||||
# release_group
|
|
||||||
"tribute": "IS_TRIBUTE_TO",
|
|
||||||
"dedicated to": "IS_DEDICATED_TO",
|
|
||||||
"creative direction": "",
|
|
||||||
"artists and repertoire": ""
|
|
||||||
}
|
|
||||||
|
|
||||||
artist_artist_rel_map = {
|
|
||||||
"teacher": "TEACHER_OF",
|
|
||||||
"composer-in-residence": "HAS_COMPOSER-IN-RESIDENCE_STATUS_IN",
|
|
||||||
"member of band": "IS_MEMBER_OF",
|
|
||||||
"voice actor": "IS_VOICE_ACTOR_OF",
|
|
||||||
"tribute": "IS_TRIBUTE_TO",
|
|
||||||
"supporting musician": "IS_SUPPORTING_MUSICIAN_OF",
|
|
||||||
"instrumental supporting musician": "IS_INSTRUMENTAL_SUPPORTING_MUSICIAN_OF",
|
|
||||||
"personal relationship": "HAS_PERSONAL_RELATIONSHIP_WITH",
|
|
||||||
"musical relationships": "HAS_MUSICAL_RELATIONSHIP_WITH",
|
|
||||||
"collaboration": "HAS_COLLABORATED_WITH",
|
|
||||||
"married": "IS_MARRIED_WITH",
|
|
||||||
"sibling": "IS_SIBLING_OF",
|
|
||||||
"parent": "IS_PARENT_OF",
|
|
||||||
"is person": "IS",
|
|
||||||
"conductor position": "IS_CONDUCTOR_OF",
|
|
||||||
"vocal supporting musician": "DOES_VOCAL_SUPPORT_FOR",
|
|
||||||
"artistic director": "IS_ARTIST_DIRECTOR_OF",
|
|
||||||
"subgroup": "IS_SUBGROUP_OF",
|
|
||||||
"founder": "IS_FOUNDER_OF",
|
|
||||||
"involved with": "IS_INVOLVED_WITH",
|
|
||||||
"named after": "IS_NAMED_AFTER",
|
|
||||||
}
|
|
||||||
|
|
||||||
label_label_rel_map = {
|
|
||||||
"label rename": "WAS_RENAMED_TO",
|
|
||||||
"imprint": "DOES_IMPRINT_FOR",
|
|
||||||
"label distribution": "DOES_DISTRIBUTION_FOR",
|
|
||||||
"business association": "HAS_BUSINESS_ASSOCIATION_TO",
|
|
||||||
"label ownership": "OWNS",
|
|
||||||
"label reissue": "DOES_REISSUING_FOR"
|
|
||||||
}
|
|
||||||
|
|
||||||
if not os.path.exists("repo"):
|
|
||||||
os.mkdir("repo")
|
|
||||||
else:
|
|
||||||
os.system("rm repo/*")
|
|
||||||
if not os.path.exists("tmp"):
|
|
||||||
os.mkdir("tmp")
|
|
||||||
else:
|
|
||||||
os.system("rm tmp/*")
|
|
||||||
|
|
||||||
with open("in/link", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
links[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("in/release_status", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
release_statuses[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("in/link_type", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
link_types[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("in/area", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
areas[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("in/label_type") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
|
|
||||||
label_types[cols[0]] = ";" + cols[1].replace(" ", "")
|
|
||||||
|
|
||||||
if cols[3] != "\\N" and cols[2] in label_types:
|
|
||||||
label_types[cols[0]] += label_types[cols[2]].replace(" ", "")
|
|
||||||
|
|
||||||
with open("in/artist") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
artists[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("repo/area_area.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Area),:END_ID(Area)\n")
|
|
||||||
|
|
||||||
with open("in/l_area_area", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
out.write(",".join((areas[cols[3]][1],
|
|
||||||
areas[cols[2]][1]
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
with open("repo/area.csv", "w") as out:
|
|
||||||
out.write("id:ID(Area),name\n")
|
|
||||||
|
|
||||||
for k, area in areas.items():
|
|
||||||
out.write(",".join((area[1],
|
|
||||||
'"' + area[2] + '"'
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# ------
|
|
||||||
|
|
||||||
|
|
||||||
out_artist = open("repo/artist.csv", "w")
|
|
||||||
out_artist_area = open("repo/artist_area.csv", "w")
|
|
||||||
|
|
||||||
out_artist.write("id:ID(Artist),name,sortname,year:int,comment,:LABEL\n")
|
|
||||||
out_artist_area.write(":START_ID(Artist),:END_ID(Area)\n")
|
|
||||||
|
|
||||||
ASCII_RE = re.compile(r"[^a-zA-Z0-9.\-!?& ]")
|
|
||||||
ALPHANUM_RE = re.compile(r"[^\w.\-!?& ]")
|
|
||||||
|
|
||||||
for _, artist in artists.items():
|
|
||||||
|
|
||||||
sortname = ASCII_RE.sub("_", artist[2]).upper()
|
|
||||||
if sortname.replace("_", "").strip() == "":
|
|
||||||
sortname = ALPHANUM_RE.sub("_", artist[3]).upper()
|
|
||||||
|
|
||||||
out_artist.write(",".join((
|
|
||||||
artist[1],
|
|
||||||
'"' + artist[2].replace("\"", "\"\"") + '"',
|
|
||||||
sortname,
|
|
||||||
artist[4] if artist[4] != "\\N" else "0",
|
|
||||||
('"' + artist[13].replace("\"", "\"\"") + '"') if artist[13] != "\\N" else "",
|
|
||||||
"Artist" + (";Group\n" if artist[10] == "2" else "\n")
|
|
||||||
)))
|
|
||||||
|
|
||||||
if artist[11] != "\\N":
|
|
||||||
out_artist_area.write(artist[1] + "," + areas[artist[11]][1] + "\n")
|
|
||||||
|
|
||||||
out_artist.close()
|
|
||||||
out_artist_area.close()
|
|
||||||
|
|
||||||
with open("repo/artist_artist.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Artist),:END_ID(Artist),:TYPE\n")
|
|
||||||
|
|
||||||
with open("in/l_artist_artist", "r") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
out.write(",".join((
|
|
||||||
artists[cols[2]][1],
|
|
||||||
artists[cols[3]][1],
|
|
||||||
artist_artist_rel_map[link_types[links[cols[1]][1]][6]] + "\n"
|
|
||||||
)))
|
|
||||||
|
|
||||||
# --------
|
|
||||||
|
|
||||||
with open("in/release_group_primary_type") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
release_types[cols[0]] = ";" + cols[1]
|
|
||||||
|
|
||||||
release_group_year = dict()
|
|
||||||
with open("in/release_group_meta") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
release_group_year[cols[0]] = cols[2] if cols[2] != "\\N" else "0"
|
|
||||||
|
|
||||||
with open("repo/release.csv", "w") as out:
|
|
||||||
out.write("id:ID(Release),name,year:int,:LABEL\n")
|
|
||||||
|
|
||||||
with open("in/release_group") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
out.write(",".join((
|
|
||||||
cols[1],
|
|
||||||
'"' + cols[2].replace("\"", "\"\"") + '"',
|
|
||||||
release_group_year[cols[0]],
|
|
||||||
"Release" + release_types[cols[4]],
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
release_groups[cols[0]] = cols
|
|
||||||
|
|
||||||
with open("in/release") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
if cols[5] != '\\N' and release_statuses[cols[5]][1] == "Official":
|
|
||||||
release_to_release_group_map[cols[0]] = cols[4]
|
|
||||||
|
|
||||||
credit_names = defaultdict(list)
|
|
||||||
|
|
||||||
with open("in/artist_credit_name") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
credit_names[cols[0]].append(artists[cols[2]][1])
|
|
||||||
|
|
||||||
with open("tmp/tmp_artist_release.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Artist),:END_ID(Release),:TYPE\n")
|
|
||||||
|
|
||||||
# Is this part really necessary?
|
|
||||||
with open("in/l_artist_release") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
if cols[3] in release_to_release_group_map:
|
|
||||||
out.write(",".join((
|
|
||||||
artists[cols[2]][1],
|
|
||||||
release_groups[release_to_release_group_map[cols[3]]][1],
|
|
||||||
artist_release_rel_map[link_types[links[cols[1]][1]][6]]
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# Artist credits
|
|
||||||
with open("in/release") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
if cols[0] in release_to_release_group_map:
|
|
||||||
for credit in credit_names[cols[3]]:
|
|
||||||
out.write(",".join((
|
|
||||||
credit,
|
|
||||||
release_groups[release_to_release_group_map[cols[0]]][1],
|
|
||||||
"CREDITED_FOR"
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# Remove dupes
|
|
||||||
os.system("(head -n 1 tmp/tmp_artist_release.csv && tail -n +2 tmp/tmp_artist_release.csv"
|
|
||||||
" | sort) | uniq > repo/artist_release.csv && rm tmp/tmp_artist_release.csv")
|
|
||||||
|
|
||||||
|
|
||||||
with open("repo/release_release.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Release),:END_ID(Release),:TYPE\n")
|
|
||||||
|
|
||||||
with open("in/l_release_group_release_group") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
out.write(",".join((
|
|
||||||
release_groups[cols[2]][1],
|
|
||||||
release_groups[cols[3]][1],
|
|
||||||
release_release_rel_map[link_types[links[cols[1]][1]][6]]
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# ---
|
|
||||||
|
|
||||||
tag_occurence = defaultdict(int)
|
|
||||||
with open("in/release_group_tag") as f:
|
|
||||||
for line in f:
|
|
||||||
tag_occurence[line.split("\t")[1]] += 1
|
|
||||||
|
|
||||||
with open("in/tag") as f:
|
|
||||||
with open("repo/tag.csv", "w") as out:
|
|
||||||
out.write("id:ID(Tag),name, occurences\n")
|
|
||||||
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
if tag_occurence[cols[0]] < 5:
|
|
||||||
continue
|
|
||||||
tags[cols[0]] = cols
|
|
||||||
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"," + str(tag_occurence[cols[0]]) + "\n")
|
|
||||||
|
|
||||||
with open("repo/release_tag.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Release),:END_ID(Tag),weight:float\n")
|
|
||||||
|
|
||||||
# get max count
|
|
||||||
max_count = 0
|
|
||||||
with open("in/release_group_tag") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
max_count = max(max_count, int(cols[2]))
|
|
||||||
max_count = max_count / 4
|
|
||||||
|
|
||||||
# weight is linear
|
|
||||||
with open("in/release_group_tag") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
count = int(cols[2])
|
|
||||||
if count <= 0:
|
|
||||||
continue
|
|
||||||
if cols[1] not in tags:
|
|
||||||
continue
|
|
||||||
out.write(",".join((
|
|
||||||
release_groups[cols[0]][1],
|
|
||||||
cols[1],
|
|
||||||
str(max(min(count / max_count, 1), 0.2)),
|
|
||||||
)) + "\n")
|
|
||||||
tag_occurence[cols[1]] += 1
|
|
||||||
|
|
||||||
|
|
||||||
with open("repo/artist_tag.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Artist),:END_ID(Tag),weight:float\n")
|
|
||||||
|
|
||||||
# get max count
|
|
||||||
max_count = 0
|
|
||||||
with open("in/artist_tag") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
max_count = max(max_count, int(cols[2]))
|
|
||||||
max_count = max_count / 4
|
|
||||||
|
|
||||||
# Weight is linear
|
|
||||||
with open("in/artist_tag") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
|
|
||||||
count = int(cols[2])
|
|
||||||
if count <= 0:
|
|
||||||
continue
|
|
||||||
if cols[1] not in tags:
|
|
||||||
continue
|
|
||||||
|
|
||||||
out.write(",".join((
|
|
||||||
artists[cols[0]][1],
|
|
||||||
cols[1],
|
|
||||||
str(max(min(count / max_count, 1), 0.2)),
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
with open("repo/tag_tag.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Tag),:END_ID(Tag),weight:float\n")
|
|
||||||
|
|
||||||
def weights():
|
|
||||||
with open("in/tag_relation") as f:
|
|
||||||
for line in f:
|
|
||||||
weight = int(line.split("\t")[2])
|
|
||||||
if weight < 5:
|
|
||||||
continue
|
|
||||||
yield weight
|
|
||||||
weight_median = median(weights()) * 3
|
|
||||||
|
|
||||||
with open("in/tag_relation") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
|
|
||||||
weight = int(cols[2])
|
|
||||||
if weight < 5:
|
|
||||||
continue
|
|
||||||
if cols[0] not in tags or cols[1] not in tags:
|
|
||||||
continue
|
|
||||||
|
|
||||||
out.write(",".join((
|
|
||||||
cols[0],
|
|
||||||
cols[1],
|
|
||||||
str(max(min(weight / weight_median, 1), 0.2)),
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# -----
|
|
||||||
|
|
||||||
with open("repo/labels.csv", "w") as out:
|
|
||||||
out.write("id:ID(Label),name,sortname,code,:LABEL\n")
|
|
||||||
|
|
||||||
with open("in/label") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
labels[cols[0]] = cols
|
|
||||||
|
|
||||||
sortname = ASCII_RE.sub("_", cols[2]).upper()
|
|
||||||
out.write(",".join((
|
|
||||||
cols[1],
|
|
||||||
"\"" + cols[2].replace("\"", "\"\"") + "\"",
|
|
||||||
sortname,
|
|
||||||
cols[9] if cols[9] != "\\N" else "",
|
|
||||||
"Label" + label_types[cols[10]]
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
with open("repo/release_label.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Release),:END_ID(Label)\n")
|
|
||||||
|
|
||||||
# Should I check link types here?
|
|
||||||
with open("in/l_label_release_group") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
out.write(release_groups[cols[3]][1] + "," + labels[cols[2]][1] + "\n")
|
|
||||||
|
|
||||||
with open("in/l_label_release") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
if cols[3] in release_to_release_group_map:
|
|
||||||
out.write(release_groups[release_to_release_group_map[cols[3]]][1] + "," + labels[cols[2]][1] + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
with open("repo/label_label.csv", "w") as out:
|
|
||||||
out.write(":START_ID(Label),:END_ID(Label),:TYPE\n")
|
|
||||||
|
|
||||||
with open("in/l_label_label") as f:
|
|
||||||
for line in f:
|
|
||||||
cols = line.split("\t")
|
|
||||||
|
|
||||||
out.write(",".join((
|
|
||||||
labels[cols[2]][1],
|
|
||||||
labels[cols[3]][1],
|
|
||||||
label_label_rel_map[link_types[links[cols[1]][1]][6]]
|
|
||||||
)) + "\n")
|
|
||||||
|
|
||||||
# ---
|
|
1
spotify
1
spotify
@ -1 +0,0 @@
|
|||||||
Subproject commit 4ac596b2ff7659b880ac8a3fe9c58ea6527c2efc
|
|
1
spotify2
1
spotify2
@ -1 +0,0 @@
|
|||||||
Subproject commit 0a05c69bcf7005496c2efdf5b825ffa2f443ccdf
|
|
85
task_get_cover.py
Normal file
85
task_get_cover.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
|
||||||
|
def should_download(image: dict):
|
||||||
|
return image["front"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def thumb(cover_blob):
|
||||||
|
with Image.open(BytesIO(cover_blob)) as image:
|
||||||
|
|
||||||
|
# https://stackoverflow.com/questions/43978819
|
||||||
|
if image.mode == "I;16":
|
||||||
|
image.mode = "I"
|
||||||
|
image.point(lambda i: i * (1. / 256)).convert('L')
|
||||||
|
|
||||||
|
image.thumbnail((256, 256), Image.BICUBIC)
|
||||||
|
canvas = Image.new("RGB", image.size, 0x000000)
|
||||||
|
|
||||||
|
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
|
||||||
|
try:
|
||||||
|
canvas.paste(image, mask=image.split()[-1])
|
||||||
|
except ValueError:
|
||||||
|
canvas.paste(image)
|
||||||
|
else:
|
||||||
|
canvas.paste(image)
|
||||||
|
|
||||||
|
blob = BytesIO()
|
||||||
|
canvas.save(blob, "JPEG", quality=85, optimize=True)
|
||||||
|
canvas.close()
|
||||||
|
|
||||||
|
return blob.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def save(mbid, tn):
|
||||||
|
with psycopg2.connect(config.connstr()) as conn:
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.covers (mbid, tn) VALUES (%s,%s) ON CONFLICT (mbid) "
|
||||||
|
"DO UPDATE SET tn = excluded.tn",
|
||||||
|
(mbid, tn)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def get_mbids(count=1):
|
||||||
|
with psycopg2.connect(config.connstr()) as conn:
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT gid FROM release_group "
|
||||||
|
"LEFT JOIN mg.covers ON gid = mbid "
|
||||||
|
"WHERE tn IS NULL "
|
||||||
|
"ORDER BY ts NULLS FIRST LIMIT %s",
|
||||||
|
(count,)
|
||||||
|
)
|
||||||
|
for row in cur:
|
||||||
|
yield row[0]
|
||||||
|
|
||||||
|
|
||||||
|
def download(mbid):
|
||||||
|
r = requests.get("https://coverartarchive.org/release-group/%s/front-250.jpg" % mbid)
|
||||||
|
|
||||||
|
if r.status_code == 200:
|
||||||
|
return r.content
|
||||||
|
if r.status_code != 404:
|
||||||
|
print("<%d> %s" % (r.status_code, r.text))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--count", type=int, default=1)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
for mbid in get_mbids(args.count):
|
||||||
|
tn = download(mbid)
|
||||||
|
save(mbid, tn)
|
||||||
|
print(mbid)
|
197
task_get_lastfm.py
Executable file
197
task_get_lastfm.py
Executable file
@ -0,0 +1,197 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import json
|
||||||
|
from itertools import repeat
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def get_mbid(lfm_name):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT mbid "
|
||||||
|
"FROM mg.lastfm_artist WHERE name=%s", (lfm_name,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
return row[0] if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def set_mbid(lfm_name, mbid):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("INSERT INTO mg.lastfm_artist VALUES (%s,%s) ON CONFLICT (name) "
|
||||||
|
"DO UPDATE SET mbid=excluded.mbid", (lfm_name, mbid))
|
||||||
|
|
||||||
|
|
||||||
|
def save_tags(lfm_name, tags):
|
||||||
|
if not tags:
|
||||||
|
return
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute("DELETE FROM mg.lastfm_artist_tag WHERE name=%s", (lfm_name,))
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.lastfm_artist_tag VALUES %s" %
|
||||||
|
",".join("('%s', '%s')" % (n, t) for (n, t) in zip(repeat(lfm_name), tags))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_data(data):
|
||||||
|
if data:
|
||||||
|
disambiguate(data["name"], mbid=data["artist"])
|
||||||
|
|
||||||
|
for similar in [s for s in data["similar"] if s["mbid"] is not None]:
|
||||||
|
disambiguate(similar["name"], similar["mbid"])
|
||||||
|
save_similar(data["name"], similar["name"], similar["match"])
|
||||||
|
|
||||||
|
save_tags(data["name"], data["tags"])
|
||||||
|
save_meta(data["name"], data["listeners"], data["playcount"])
|
||||||
|
|
||||||
|
|
||||||
|
def save_similar(lfm_name, similar, weight):
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.lastfm_artist_artist (name0, name1, weight) VALUES (%s,%s,%s) "
|
||||||
|
"ON CONFLICT (name0, name1) DO UPDATE SET weight=excluded.weight, ts=CURRENT_TIMESTAMP",
|
||||||
|
(lfm_name, similar, weight)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_meta(lfm_name, listeners, playcount):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("INSERT INTO mg.lastfm_artist_meta VALUES (%s,%s,%s) ON CONFLICT (name) "
|
||||||
|
"DO UPDATE SET listeners=excluded.listeners, playcount=excluded.playcount",
|
||||||
|
(lfm_name, listeners, playcount))
|
||||||
|
|
||||||
|
|
||||||
|
def save_raw_data(name, mbid, data):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("INSERT INTO mg.lastfm_raw_data (name, mbid, data) VALUES (%s,%s,%s) "
|
||||||
|
"ON CONFLICT (name, mbid) DO UPDATE SET ts=CURRENT_TIMESTAMP, data=excluded.data",
|
||||||
|
(name, mbid, json.dumps(data)))
|
||||||
|
|
||||||
|
|
||||||
|
def get_release_count(mbid):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute('SELECT COUNT(*) '
|
||||||
|
'FROM l_artist_release '
|
||||||
|
'INNER JOIN artist a ON entity0 = a.id '
|
||||||
|
'WHERE a.gid = %s', (mbid,))
|
||||||
|
row = cur.fetchone()
|
||||||
|
return row[0] if row else 0
|
||||||
|
|
||||||
|
|
||||||
|
def disambiguate(name, mbid):
|
||||||
|
"""
|
||||||
|
A lastfm artist name can refer to multiple MBIDs
|
||||||
|
For RELATED_TO purposes, we assume that the MBID referring
|
||||||
|
to the artist with the most official releases is the one
|
||||||
|
"""
|
||||||
|
existing_mbid = get_mbid(name)
|
||||||
|
|
||||||
|
if existing_mbid and mbid != existing_mbid:
|
||||||
|
if get_release_count(existing_mbid) < get_release_count(mbid):
|
||||||
|
set_mbid(name, mbid)
|
||||||
|
else:
|
||||||
|
set_mbid(name, mbid)
|
||||||
|
|
||||||
|
|
||||||
|
def get_cached_artist_data(name, mbid, max_age_days):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT data FROM mg.lastfm_raw_data WHERE name=%s AND mbid=%s "
|
||||||
|
"AND date_part('day', CURRENT_TIMESTAMP - ts) <= %s ",
|
||||||
|
(name, mbid, max_age_days))
|
||||||
|
|
||||||
|
row = cur.fetchone()
|
||||||
|
return row[0] if row else 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_artist_data(name: str, mbid: str):
|
||||||
|
cached_data = get_cached_artist_data(name, mbid, max_age_days=30)
|
||||||
|
if cached_data:
|
||||||
|
return cached_data
|
||||||
|
|
||||||
|
raw = []
|
||||||
|
url = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&mbid=%s&api_key=%s&format=json" % \
|
||||||
|
(mbid, config.config["LASTFM_APIKEY"],)
|
||||||
|
r = requests.get(url)
|
||||||
|
raw.append((url, r.text))
|
||||||
|
info_json = r.json()
|
||||||
|
|
||||||
|
by_name = False
|
||||||
|
|
||||||
|
if "artist" not in info_json:
|
||||||
|
url1 = "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist=%s&api_key=%s&format=json" % \
|
||||||
|
(name, config.config["LASTFM_APIKEY"],)
|
||||||
|
r = requests.get(url1)
|
||||||
|
raw.append((url1, r.text))
|
||||||
|
info_json = r.json()
|
||||||
|
if "artist" not in info_json:
|
||||||
|
if "Rate Limit Exceeded" in r.text:
|
||||||
|
raise Exception("Rate Limit Exceeded!")
|
||||||
|
data = {
|
||||||
|
"_raw": raw
|
||||||
|
}
|
||||||
|
save_raw_data(name, mbid, data)
|
||||||
|
return
|
||||||
|
by_name = True
|
||||||
|
|
||||||
|
if by_name:
|
||||||
|
url2 = "https://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&artist=%s&api_key=%s&format=json" % (
|
||||||
|
name, config.config["LASTFM_APIKEY"],)
|
||||||
|
else:
|
||||||
|
url2 = "https://ws.audioscrobbler.com/2.0/?method=artist.getsimilar&mbid=%s&api_key=%s&format=json" % (
|
||||||
|
mbid, config.config["LASTFM_APIKEY"],)
|
||||||
|
r2 = requests.get(url2)
|
||||||
|
raw.append((url2, r2.text))
|
||||||
|
similar_json = r2.json()
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"artist": mbid,
|
||||||
|
"name": info_json["artist"]["name"],
|
||||||
|
"mbid": info_json["artist"]["mbid"] if "mbid" in info_json["artist"] else None,
|
||||||
|
"tags": [t["name"] for t in info_json["artist"]["tags"]["tag"]] if "tags" in info_json["artist"] and "tag" in
|
||||||
|
info_json["artist"]["tags"] else [],
|
||||||
|
"listeners": info_json["artist"]["stats"]["listeners"],
|
||||||
|
"playcount": info_json["artist"]["stats"]["playcount"],
|
||||||
|
"similar": [
|
||||||
|
{
|
||||||
|
"mbid": a["mbid"] if "mbid" in a else None,
|
||||||
|
"match": a["match"],
|
||||||
|
"name": a["name"]
|
||||||
|
}
|
||||||
|
for a in similar_json["similarartists"]["artist"]],
|
||||||
|
"_raw": raw
|
||||||
|
}
|
||||||
|
|
||||||
|
save_raw_data(name, mbid, data)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def get_task(count=1):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT artist.name, artist.gid FROM artist "
|
||||||
|
"LEFT JOIN mg.lastfm_raw_data lfm ON lfm.mbid=gid AND lfm.name=artist.name "
|
||||||
|
"ORDER BY lfm.ts NULLS FIRST LIMIT %s",
|
||||||
|
(count,)
|
||||||
|
)
|
||||||
|
return cur.fetchone()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--count", type=int, default=1)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
conn = psycopg2.connect(config.connstr())
|
||||||
|
|
||||||
|
for task in get_task(args.count):
|
||||||
|
save_data(get_artist_data(*task))
|
||||||
|
conn.commit()
|
||||||
|
print(task[0])
|
||||||
|
|
||||||
|
conn.close()
|
261
task_get_spotify.py
Executable file
261
task_get_spotify.py
Executable file
@ -0,0 +1,261 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import json
|
||||||
|
from itertools import repeat
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import spotipy
|
||||||
|
from hexlib.misc import silent_stdout
|
||||||
|
from spotipy.oauth2 import SpotifyClientCredentials
|
||||||
|
|
||||||
|
import config
|
||||||
|
|
||||||
|
|
||||||
|
def save_raw(query, endpoint, data):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_raw_data (query, endpoint, data) VALUES (%s,%s,%s) "
|
||||||
|
"ON CONFLICT (query, endpoint) "
|
||||||
|
"DO UPDATE SET ts=CURRENT_TIMESTAMP, data=excluded.data",
|
||||||
|
(query, endpoint, json.dumps(data))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_artist(data, max_age_days=30):
|
||||||
|
"""Returns True if artist is new (and therefore, its albums, tracks etc. should be fetched)"""
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute("SELECT spotid FROM mg.spotify_artist_meta WHERE spotid=%s AND "
|
||||||
|
"date_part('day', CURRENT_TIMESTAMP - ts) <= %s", (data["id"], max_age_days,))
|
||||||
|
if cur.fetchone():
|
||||||
|
return False
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist_meta (spotid, name, followers, popularity) "
|
||||||
|
"VALUES (%s,%s,%s,%s) "
|
||||||
|
"ON CONFLICT (spotid) "
|
||||||
|
"DO UPDATE SET name=excluded.name, followers=excluded.followers, popularity=excluded.popularity",
|
||||||
|
(data["id"], data["name"], data["followers"]["total"], data["popularity"])
|
||||||
|
)
|
||||||
|
|
||||||
|
cur.execute("DELETE FROM mg.spotify_artist_tag WHERE spotid=%s", (data["id"],))
|
||||||
|
if data["genres"]:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist_tag VALUES %s" %
|
||||||
|
",".join("('%s', '%s')" % (n, t.replace("'", "''")) for (n, t) in zip(repeat(data["id"]), data["genres"]))
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_albums(spotid):
|
||||||
|
data = silent_stdout(spotify.artist_albums, spotid, album_type="album,single,compilation")
|
||||||
|
save_raw(spotid, "artist_albums", data)
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("DELETE FROM mg.spotify_artist_album WHERE spotid=%s", (spotid,))
|
||||||
|
if data["items"]:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist_album VALUES %s" %
|
||||||
|
",".join("('%s', '%s')" % (n, t.replace("'", "''"))
|
||||||
|
for (n, t) in zip(repeat(spotid), set(a["name"] for a in data["items"])))
|
||||||
|
)
|
||||||
|
return list()
|
||||||
|
|
||||||
|
|
||||||
|
def get_tracks(spotid):
|
||||||
|
data = silent_stdout(spotify.artist_top_tracks, spotid)
|
||||||
|
save_raw(spotid, "artist_top_tracks", data)
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("DELETE FROM mg.spotify_artist_track WHERE spotid=%s", (spotid,))
|
||||||
|
|
||||||
|
unique_tracks = []
|
||||||
|
done = set()
|
||||||
|
for track in data["tracks"]:
|
||||||
|
if track["name"] in done:
|
||||||
|
continue
|
||||||
|
unique_tracks.append((track["name"], track["preview_url"]))
|
||||||
|
done.add(track["name"])
|
||||||
|
|
||||||
|
if unique_tracks:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist_track (spotid, track, url) VALUES %s" %
|
||||||
|
",".join("('%s', '%s', '%s')" % (i, t[0].replace("'", "''"), t[1])
|
||||||
|
for (i, t) in zip(repeat(spotid), unique_tracks))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def related(spotid):
|
||||||
|
data = silent_stdout(spotify.artist_related_artists, spotid)
|
||||||
|
save_raw(spotid, "artist_related_artists", data)
|
||||||
|
return data["artists"]
|
||||||
|
|
||||||
|
|
||||||
|
def save_artist_artist(id0, relations):
|
||||||
|
if relations:
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist_artist (spotid0, spotid1, index) "
|
||||||
|
"VALUES %s "
|
||||||
|
"ON CONFLICT (spotid0, spotid1) "
|
||||||
|
"DO NOTHING" %
|
||||||
|
",".join("('%s', '%s', '%d')" % (r[0], r[1]["id"], i) for (i, r) in enumerate(zip(repeat(id0), relations)))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_mbids_with_matching_name(name):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT gid FROM artist "
|
||||||
|
"WHERE asciifold_lower(name)=asciifold_lower(%s)",
|
||||||
|
(name,)
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
|
||||||
|
return [r[0] for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_spotify_conflict(mbid, existing_spotid, new_spotid):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT asciifold_lower(album) FROM mg.spotify_artist_album WHERE spotid=%s",
|
||||||
|
(new_spotid,)
|
||||||
|
)
|
||||||
|
new_albums = set(row[0] for row in cur.fetchall())
|
||||||
|
|
||||||
|
if len(new_albums) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"SELECT asciifold_lower(album) FROM mg.spotify_artist_album WHERE spotid=%s",
|
||||||
|
(existing_spotid,)
|
||||||
|
)
|
||||||
|
existing_albums = set(row[0] for row in cur.fetchall())
|
||||||
|
|
||||||
|
if len(existing_albums) != 0:
|
||||||
|
cur.execute(
|
||||||
|
"SELECT DISTINCT asciifold_lower(release.name) FROM release "
|
||||||
|
"INNER JOIN artist_credit_name cn ON cn.artist_credit = release.artist_credit "
|
||||||
|
"INNER JOIN artist a on a.id = cn.artist "
|
||||||
|
"WHERE a.gid=%s", (mbid,)
|
||||||
|
)
|
||||||
|
mb_albums = set(row[0] for row in cur.fetchall())
|
||||||
|
if len(new_albums.intersection(mb_albums)) > len(existing_albums.intersection(mb_albums)):
|
||||||
|
cur.execute("UPDATE mg.spotify_artist SET spotid = %s WHERE mbid=%s", (new_spotid, mbid))
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_mb_conflict(spotid, mbids):
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
"SELECT asciifold_lower(album) FROM mg.spotify_artist_album WHERE spotid=%s",
|
||||||
|
(spotid,)
|
||||||
|
)
|
||||||
|
spot_albums = set(row[0] for row in cur.fetchall())
|
||||||
|
|
||||||
|
best_match_count = -1
|
||||||
|
best_match = None
|
||||||
|
|
||||||
|
if len(spot_albums) == 0:
|
||||||
|
# We can't base our conflict resolution based on album names,
|
||||||
|
# pick the one with the most releases
|
||||||
|
for mbid in mbids:
|
||||||
|
cur.execute(
|
||||||
|
"SELECT count(release.name) FROM release "
|
||||||
|
"INNER JOIN artist_credit_name cn ON cn.artist_credit = release.artist_credit "
|
||||||
|
"INNER JOIN artist a on a.id = cn.artist "
|
||||||
|
"WHERE a.gid = %s ",
|
||||||
|
(mbid,)
|
||||||
|
)
|
||||||
|
match_count = cur.fetchone()[0]
|
||||||
|
if match_count > best_match_count:
|
||||||
|
best_match_count = match_count
|
||||||
|
best_match = mbid
|
||||||
|
else:
|
||||||
|
for mbid in mbids:
|
||||||
|
cur.execute(
|
||||||
|
"SELECT asciifold_lower(release.name) FROM release "
|
||||||
|
"INNER JOIN artist_credit_name cn ON cn.artist_credit = release.artist_credit "
|
||||||
|
"INNER JOIN artist a on a.id = cn.artist "
|
||||||
|
"WHERE a.gid = %s ",
|
||||||
|
(mbid,)
|
||||||
|
)
|
||||||
|
match_count = len(set(row[0] for row in cur.fetchall()).intersection(spot_albums))
|
||||||
|
if match_count > best_match_count:
|
||||||
|
best_match_count = match_count
|
||||||
|
best_match = mbid
|
||||||
|
|
||||||
|
save_spotid_to_mbid(spotid, best_match)
|
||||||
|
|
||||||
|
|
||||||
|
def save_spotid_to_mbid(spotid, mbid):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT spotid FROM mg.spotify_artist WHERE mbid=%s",
|
||||||
|
(mbid,)
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if row:
|
||||||
|
resolve_spotify_conflict(mbid, row[0], spotid)
|
||||||
|
else:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO mg.spotify_artist (spotid, mbid) VALUES (%s,%s)",
|
||||||
|
(spotid, mbid)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def search_artist(name):
|
||||||
|
quoted_name = "\"%s\"" % name
|
||||||
|
|
||||||
|
data = silent_stdout(spotify.search, quoted_name, type="artist", limit=20)
|
||||||
|
save_raw(name, "search", data)
|
||||||
|
|
||||||
|
for result in data["artists"]["items"]:
|
||||||
|
if save_artist(result):
|
||||||
|
mbids = get_mbids_with_matching_name(result["name"])
|
||||||
|
|
||||||
|
get_albums(result["id"])
|
||||||
|
get_tracks(result["id"])
|
||||||
|
|
||||||
|
if len(mbids) > 1:
|
||||||
|
resolve_mb_conflict(result["id"], mbids)
|
||||||
|
elif len(mbids) == 1:
|
||||||
|
save_spotid_to_mbid(result["id"], mbids[0])
|
||||||
|
|
||||||
|
save_artist_artist(result["id"], related(result["id"]))
|
||||||
|
|
||||||
|
|
||||||
|
def get_tasks(count=1):
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(
|
||||||
|
"SELECT artist.name FROM artist "
|
||||||
|
"LEFT JOIN mg.spotify_artist sa ON sa.mbid=gid "
|
||||||
|
"LEFT JOIN mg.spotify_raw_data srd ON srd.query=artist.name AND endpoint='search' "
|
||||||
|
"LEFT JOIN mg.spotify_artist_meta sam ON sa.spotid=sam.spotid "
|
||||||
|
"ORDER BY sam.ts NULLS FIRST, srd.ts NULLS FIRST LIMIT %s",
|
||||||
|
(count,)
|
||||||
|
)
|
||||||
|
for row in cur:
|
||||||
|
yield row[0]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--count", type=int, default=1)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
conn = psycopg2.connect(config.connstr())
|
||||||
|
client_credentials_manager = SpotifyClientCredentials(
|
||||||
|
client_id=config.config["SPOTIFY_CLIENTID"],
|
||||||
|
client_secret=config.config["SPOTIFY_SECRET"]
|
||||||
|
)
|
||||||
|
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
|
||||||
|
|
||||||
|
for name in get_tasks(args.count):
|
||||||
|
search_artist(name)
|
||||||
|
conn.commit()
|
||||||
|
print(name)
|
||||||
|
|
||||||
|
conn.close()
|
@ -1 +0,0 @@
|
|||||||
Subproject commit aa15a1b29e2fc7f03dafc9301c65e32cb82e4cb4
|
|
Loading…
x
Reference in New Issue
Block a user