mirror of
https://github.com/simon987/music-graph-scripts.git
synced 2025-04-19 10:26:43 +00:00
Normalize tag weight
This commit is contained in:
parent
14327c62cc
commit
6460e98ce0
@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
|
export NEO4J_HOME="/home/drone/Documents/neo4j"
|
||||||
export REPOSITORY="http://localhost:9999"
|
export REPOSITORY="http://localhost:9999"
|
||||||
export DATABASE="graph.db"
|
export DATABASE="graph.db"
|
||||||
|
|
||||||
|
@ -313,40 +313,59 @@ with open("in/tag") as f:
|
|||||||
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
|
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
|
||||||
|
|
||||||
with open("repo/release_tag.csv", "w") as out:
|
with open("repo/release_tag.csv", "w") as out:
|
||||||
out.write(":START_ID(Release),:END_ID(Tag),weight:int\n")
|
out.write(":START_ID(Release),:END_ID(Tag),weight:float\n")
|
||||||
|
|
||||||
|
# get max count
|
||||||
|
max_count = 0
|
||||||
with open("in/release_group_tag") as f:
|
with open("in/release_group_tag") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
cols = line.split("\t")
|
cols = line.split("\t")
|
||||||
|
max_count = max(max_count, int(cols[2]))
|
||||||
|
max_count = max_count / 4
|
||||||
|
|
||||||
if int(cols[2]) <= 0:
|
# weight is linear
|
||||||
|
with open("in/release_group_tag") as f:
|
||||||
|
for line in f:
|
||||||
|
cols = line.split("\t")
|
||||||
|
count = int(cols[2])
|
||||||
|
if count <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
out.write(",".join((
|
out.write(",".join((
|
||||||
release_groups[cols[0]][1],
|
release_groups[cols[0]][1],
|
||||||
cols[1],
|
cols[1],
|
||||||
cols[2],
|
str(max(min(count / max_count, 1), 0.2)),
|
||||||
)) + "\n")
|
)) + "\n")
|
||||||
|
|
||||||
with open("repo/artist_tag.csv", "w") as out:
|
with open("repo/artist_tag.csv", "w") as out:
|
||||||
out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n")
|
out.write(":START_ID(Artist),:END_ID(Tag),weight:float\n")
|
||||||
|
|
||||||
|
# get max count
|
||||||
|
max_count = 0
|
||||||
|
with open("in/artist_tag") as f:
|
||||||
|
for line in f:
|
||||||
|
cols = line.split("\t")
|
||||||
|
max_count = max(max_count, int(cols[2]))
|
||||||
|
max_count = max_count / 4
|
||||||
|
|
||||||
|
# Weight is linear
|
||||||
with open("in/artist_tag") as f:
|
with open("in/artist_tag") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
cols = line.split("\t")
|
cols = line.split("\t")
|
||||||
|
|
||||||
if int(cols[2]) <= 0:
|
count = int(cols[2])
|
||||||
|
if count <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
out.write(",".join((
|
out.write(",".join((
|
||||||
artists[cols[0]][1],
|
artists[cols[0]][1],
|
||||||
cols[1],
|
cols[1],
|
||||||
cols[2],
|
str(max(min(count / max_count, 1), 0.2)),
|
||||||
)) + "\n")
|
)) + "\n")
|
||||||
|
|
||||||
with open("repo/tag_tag.csv", "w") as out:
|
with open("repo/tag_tag.csv", "w") as out:
|
||||||
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
|
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
|
||||||
|
|
||||||
|
# TODO: normalize weight so it's between [0,1]
|
||||||
with open("in/tag_relation") as f:
|
with open("in/tag_relation") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
cols = line.split("\t")
|
cols = line.split("\t")
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
CREATE INDEX ON :Artist(id);
|
CREATE INDEX ON :Artist(id);
|
||||||
|
CREATE INDEX ON :Artist(name);
|
||||||
CREATE INDEX ON :Release(id);
|
CREATE INDEX ON :Release(id);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
|
export NEO4J_HOME="/home/drone/Documents/neo4j"
|
||||||
|
|
||||||
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell
|
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell
|
||||||
|
Loading…
x
Reference in New Issue
Block a user