mirror of
https://github.com/simon987/music-graph-scripts.git
synced 2025-04-10 14:06:42 +00:00
Normalize tag weight
This commit is contained in:
parent
14327c62cc
commit
6460e98ce0
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
|
||||
export NEO4J_HOME="/home/drone/Documents/neo4j"
|
||||
export REPOSITORY="http://localhost:9999"
|
||||
export DATABASE="graph.db"
|
||||
|
||||
|
@ -313,40 +313,59 @@ with open("in/tag") as f:
|
||||
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
|
||||
|
||||
with open("repo/release_tag.csv", "w") as out:
|
||||
out.write(":START_ID(Release),:END_ID(Tag),weight:int\n")
|
||||
out.write(":START_ID(Release),:END_ID(Tag),weight:float\n")
|
||||
|
||||
# get max count
|
||||
max_count = 0
|
||||
with open("in/release_group_tag") as f:
|
||||
for line in f:
|
||||
cols = line.split("\t")
|
||||
max_count = max(max_count, int(cols[2]))
|
||||
max_count = max_count / 4
|
||||
|
||||
if int(cols[2]) <= 0:
|
||||
# weight is linear
|
||||
with open("in/release_group_tag") as f:
|
||||
for line in f:
|
||||
cols = line.split("\t")
|
||||
count = int(cols[2])
|
||||
if count <= 0:
|
||||
continue
|
||||
|
||||
out.write(",".join((
|
||||
release_groups[cols[0]][1],
|
||||
cols[1],
|
||||
cols[2],
|
||||
str(max(min(count / max_count, 1), 0.2)),
|
||||
)) + "\n")
|
||||
|
||||
with open("repo/artist_tag.csv", "w") as out:
|
||||
out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n")
|
||||
out.write(":START_ID(Artist),:END_ID(Tag),weight:float\n")
|
||||
|
||||
# get max count
|
||||
max_count = 0
|
||||
with open("in/artist_tag") as f:
|
||||
for line in f:
|
||||
cols = line.split("\t")
|
||||
max_count = max(max_count, int(cols[2]))
|
||||
max_count = max_count / 4
|
||||
|
||||
# Weight is linear
|
||||
with open("in/artist_tag") as f:
|
||||
for line in f:
|
||||
cols = line.split("\t")
|
||||
|
||||
if int(cols[2]) <= 0:
|
||||
count = int(cols[2])
|
||||
if count <= 0:
|
||||
continue
|
||||
|
||||
out.write(",".join((
|
||||
artists[cols[0]][1],
|
||||
cols[1],
|
||||
cols[2],
|
||||
str(max(min(count / max_count, 1), 0.2)),
|
||||
)) + "\n")
|
||||
|
||||
with open("repo/tag_tag.csv", "w") as out:
|
||||
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
|
||||
|
||||
# TODO: normalize weight so it's between [0,1]
|
||||
with open("in/tag_relation") as f:
|
||||
for line in f:
|
||||
cols = line.split("\t")
|
||||
|
@ -1,2 +1,3 @@
|
||||
CREATE INDEX ON :Artist(id);
|
||||
CREATE INDEX ON :Artist(name);
|
||||
CREATE INDEX ON :Release(id);
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
|
||||
export NEO4J_HOME="/home/drone/Documents/neo4j"
|
||||
|
||||
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell
|
||||
|
Loading…
x
Reference in New Issue
Block a user