diff --git a/generate_lastfm_tasks.py b/generate_lastfm_tasks.py index 1d8051c..444c761 100644 --- a/generate_lastfm_tasks.py +++ b/generate_lastfm_tasks.py @@ -2,7 +2,7 @@ import csv import json from multiprocessing.pool import ThreadPool -from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker +from task_tracker_drone.src.tt_drone.api import TaskTrackerApi, Worker, LOG_TRACE TT_API_URL = "https://tt.simon987.net/api" TT_PROJECT = 1 diff --git a/process_lastfm_data.py b/process_lastfm_data.py index af6b1c6..014297a 100644 --- a/process_lastfm_data.py +++ b/process_lastfm_data.py @@ -69,7 +69,7 @@ def patch(lastfm_data): with open("repo/lastfm_artist.csv", "w") as out: writer = csv.writer(out) writer.writerow([ - "id:ID(Artist)", "name", "year:short", ":LABEL", "listeners:int", "playcount:int" + "id:ID(Artist)", "name", "sortname", "year:short", "comment", ":LABEL", "listeners:int", "playcount:int" ]) with open("repo/artist.csv") as f: @@ -82,6 +82,8 @@ def patch(lastfm_data): row[1], row[2], row[3], + row[4], + row[5], artist_listeners.get(row[0], (0, 0))[0], artist_listeners.get(row[0], (0, 0))[1], ]) diff --git a/process_mb_dump.py b/process_mb_dump.py index 2828a4d..5c610b8 100644 --- a/process_mb_dump.py +++ b/process_mb_dump.py @@ -1,5 +1,6 @@ import os from collections import defaultdict +import re links = dict() link_types = dict() @@ -188,14 +189,24 @@ with open("repo/area.csv", "w") as out: out_artist = open("repo/artist.csv", "w") out_artist_area = open("repo/artist_area.csv", "w") -out_artist.write("id:ID(Artist),name,year:int,:LABEL\n") +out_artist.write("id:ID(Artist),name,sortname,year:int,comment,:LABEL\n") out_artist_area.write(":START_ID(Artist),:END_ID(Area)\n") +ASCII_RE = re.compile(r"[^a-zA-Z0-9.\-!?& ]") +ALPHANUM_RE = re.compile(r"[^\w.\-!?& ]") + for _, artist in artists.items(): + + sortname = ASCII_RE.sub("_", artist[2]).upper() + if sortname.replace("_", "").strip() == "": + sortname = ALPHANUM_RE.sub("_", artist[3]).upper() + out_artist.write(",".join(( artist[1], '"' + artist[2].replace("\"", "\"\"") + '"', + sortname, artist[4] if artist[4] != "\\N" else "0", + ('"' + artist[13].replace("\"", "\"\"") + '"') if artist[13] != "\\N" else "", "Artist" + (";Group\n" if artist[10] == "2" else "\n") ))) diff --git a/seed.cypher b/seed.cypher index 02606bf..9b1edce 100644 --- a/seed.cypher +++ b/seed.cypher @@ -1,3 +1,3 @@ CREATE INDEX ON :Artist(id); -CREATE INDEX ON :Artist(name); +CREATE INDEX ON :Artist(sortname); CREATE INDEX ON :Release(id);