mirror of
https://github.com/simon987/music-graph-ui.git
synced 2025-04-18 01:26:43 +00:00
75 lines
2.2 KiB
Python
75 lines
2.2 KiB
Python
import csv
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
|
|
|
|
def patch(lastfm_data):
|
|
with sqlite3.connect(lastfm_data) as conn:
|
|
cur = conn.cursor()
|
|
|
|
cur.execute("SELECT data FROM lastfmdata", )
|
|
data = cur.fetchall()
|
|
|
|
if data:
|
|
|
|
buffer = []
|
|
dup_buf = set()
|
|
artist_listeners = dict()
|
|
artists = set()
|
|
|
|
for row in data:
|
|
lastfm_data = json.loads(row[0])
|
|
|
|
for similar in [s for s in lastfm_data["similar"] if s["mbid"] is not None]:
|
|
if (similar["mbid"], lastfm_data["artist"]) not in dup_buf:
|
|
buffer.append((
|
|
similar["mbid"],
|
|
lastfm_data["artist"],
|
|
similar["match"]
|
|
))
|
|
dup_buf.add((similar["mbid"], lastfm_data["artist"]))
|
|
dup_buf.add((lastfm_data["artist"], similar["mbid"]))
|
|
|
|
artist_listeners[lastfm_data["artist"]] = (lastfm_data["listeners"], lastfm_data["playcount"])
|
|
|
|
del dup_buf
|
|
|
|
with open("repo/lastfm_artist.csv", "w") as out:
|
|
|
|
writer = csv.writer(out)
|
|
writer.writerow([
|
|
"id:ID(Artist)", "name", ":LABEL", "listeners:int", "playcount:int"
|
|
])
|
|
|
|
with open("repo/artist.csv") as f:
|
|
reader = csv.reader(f)
|
|
|
|
reader.__next__() # Skip header
|
|
for row in reader:
|
|
writer.writerow([
|
|
row[0],
|
|
row[1],
|
|
row[2],
|
|
row[3],
|
|
artist_listeners.get(row[0], (0, 0))[0],
|
|
artist_listeners.get(row[0], (0, 0))[1],
|
|
])
|
|
artists.add(row[0])
|
|
|
|
with open("repo/lastfm_artist_artist.csv", "w") as out:
|
|
out.write(",".join((
|
|
":START_ID(Artist)", ":END_ID(Artist)", "weight"
|
|
)) + "\n")
|
|
|
|
for x in buffer:
|
|
if x[0] not in artists:
|
|
continue
|
|
if x[1] not in artists:
|
|
continue
|
|
|
|
out.write(",".join(x) + "\n")
|
|
|
|
|
|
patch(sys.argv[1])
|