music-graph-ui/patch_artists_with_lastfm.py
2019-04-07 11:25:50 -04:00

75 lines
2.2 KiB
Python

import csv
import json
import sqlite3
import sys
def patch(lastfm_data):
with sqlite3.connect(lastfm_data) as conn:
cur = conn.cursor()
cur.execute("SELECT data FROM lastfmdata", )
data = cur.fetchall()
if data:
buffer = []
dup_buf = set()
artist_listeners = dict()
artists = set()
for row in data:
lastfm_data = json.loads(row[0])
for similar in [s for s in lastfm_data["similar"] if s["mbid"] is not None]:
if (similar["mbid"], lastfm_data["artist"]) not in dup_buf:
buffer.append((
similar["mbid"],
lastfm_data["artist"],
similar["match"]
))
dup_buf.add((similar["mbid"], lastfm_data["artist"]))
dup_buf.add((lastfm_data["artist"], similar["mbid"]))
artist_listeners[lastfm_data["artist"]] = (lastfm_data["listeners"], lastfm_data["playcount"])
del dup_buf
with open("repo/lastfm_artist.csv", "w") as out:
writer = csv.writer(out)
writer.writerow([
"id:ID(Artist)", "name", ":LABEL", "listeners:int", "playcount:int"
])
with open("repo/artist.csv") as f:
reader = csv.reader(f)
reader.__next__() # Skip header
for row in reader:
writer.writerow([
row[0],
row[1],
row[2],
row[3],
artist_listeners.get(row[0], (0, 0))[0],
artist_listeners.get(row[0], (0, 0))[1],
])
artists.add(row[0])
with open("repo/lastfm_artist_artist.csv", "w") as out:
out.write(",".join((
":START_ID(Artist)", ":END_ID(Artist)", "weight"
)) + "\n")
for x in buffer:
if x[0] not in artists:
continue
if x[1] not in artists:
continue
out.write(",".join(x) + "\n")
patch(sys.argv[1])