Normalize tag weight

This commit is contained in:
simon 2019-05-25 08:51:53 -04:00
parent 14327c62cc
commit 6460e98ce0
5 changed files with 30 additions and 10 deletions

View File

@ -1,6 +1,6 @@
#!/bin/bash
#!/usr/bin/env bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
export NEO4J_HOME="/home/drone/Documents/neo4j"
export REPOSITORY="http://localhost:9999"
export DATABASE="graph.db"

View File

@ -313,40 +313,59 @@ with open("in/tag") as f:
out.write(cols[0] + ",\"" + cols[1].replace("\"", "\"\"") + "\"\n")
with open("repo/release_tag.csv", "w") as out:
out.write(":START_ID(Release),:END_ID(Tag),weight:int\n")
out.write(":START_ID(Release),:END_ID(Tag),weight:float\n")
# get max count
max_count = 0
with open("in/release_group_tag") as f:
for line in f:
cols = line.split("\t")
max_count = max(max_count, int(cols[2]))
max_count = max_count / 4
if int(cols[2]) <= 0:
# weight is linear
with open("in/release_group_tag") as f:
for line in f:
cols = line.split("\t")
count = int(cols[2])
if count <= 0:
continue
out.write(",".join((
release_groups[cols[0]][1],
cols[1],
cols[2],
str(max(min(count / max_count, 1), 0.2)),
)) + "\n")
with open("repo/artist_tag.csv", "w") as out:
out.write(":START_ID(Artist),:END_ID(Tag),weight:int\n")
out.write(":START_ID(Artist),:END_ID(Tag),weight:float\n")
# get max count
max_count = 0
with open("in/artist_tag") as f:
for line in f:
cols = line.split("\t")
max_count = max(max_count, int(cols[2]))
max_count = max_count / 4
# Weight is linear
with open("in/artist_tag") as f:
for line in f:
cols = line.split("\t")
if int(cols[2]) <= 0:
count = int(cols[2])
if count <= 0:
continue
out.write(",".join((
artists[cols[0]][1],
cols[1],
cols[2],
str(max(min(count / max_count, 1), 0.2)),
)) + "\n")
with open("repo/tag_tag.csv", "w") as out:
out.write(":START_ID(Tag),:END_ID(Tag),weight:int\n")
# TODO: normalize weight so it's between [0,1]
with open("in/tag_relation") as f:
for line in f:
cols = line.split("\t")

View File

@ -1,2 +1,3 @@
CREATE INDEX ON :Artist(id);
CREATE INDEX ON :Artist(name);
CREATE INDEX ON :Release(id);

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
export NEO4J_HOME="/home/drone/Downloads/neo4j-community-3.5.3"
export NEO4J_HOME="/home/drone/Documents/neo4j"
cat seed.cypher | ${NEO4J_HOME}/bin/cypher-shell