mirror of
https://github.com/simon987/yt-metadata.git
synced 2025-04-04 08:23:00 +00:00
Initial commit
This commit is contained in:
commit
33be058e49
252
import.py
Normal file
252
import.py
Normal file
@ -0,0 +1,252 @@
|
||||
import json
|
||||
import os
|
||||
import psycopg2
|
||||
|
||||
DB_STR = "dbname=yt-meta user=yt-meta"
|
||||
tags_cache = {}
|
||||
categories_cache = {}
|
||||
licenses_cache = {}
|
||||
|
||||
|
||||
def init_cache():
|
||||
|
||||
with psycopg2.connect(DB_STR) as conn:
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Tags
|
||||
cursor.execute('SELECT id, name FROM tag')
|
||||
for tag in cursor.fetchall():
|
||||
tags_cache[tag[1]] = tag[0]
|
||||
|
||||
# Categories
|
||||
cursor.execute('SELECT id, name FROM category')
|
||||
for category in cursor.fetchall():
|
||||
categories_cache[category[1]] = category[0]
|
||||
|
||||
# License
|
||||
cursor.execute('SELECT id, name FROM license')
|
||||
for license in cursor.fetchall():
|
||||
licenses_cache[license[1]] = license[0]
|
||||
|
||||
|
||||
def create_tag(cursor, tag, video_id):
|
||||
tag_id = tags_cache.get(tag, None)
|
||||
if not tag_id:
|
||||
cursor.execute('INSERT INTO tag (name) VALUES (%s)', (tag, ))
|
||||
cursor.execute('SELECT LASTVAL()')
|
||||
|
||||
tag_id = cursor.fetchone()[0]
|
||||
tags_cache[tag] = tag_id
|
||||
print("Created tag '" + tag + "' with id " + str(tag_id))
|
||||
cursor.execute('INSERT INTO video_has_tag (video_id, tag_id) VALUES (%s,%s)', (video_id, tag_id))
|
||||
|
||||
|
||||
def create_category(cursor, category, video_id):
|
||||
|
||||
category_id = categories_cache.get(category, None)
|
||||
if not category_id:
|
||||
cursor.execute('INSERT INTO category (name) VALUES (%s)', (category, ))
|
||||
cursor.execute('SELECT LASTVAL()')
|
||||
|
||||
category_id = cursor.fetchone()[0]
|
||||
categories_cache[category] = category_id
|
||||
print("Created category '" + category + "' with id " + str(category_id))
|
||||
cursor.execute('INSERT INTO video_in_category (video_id, category_id) VALUES (%s,%s)', (video_id, category_id))
|
||||
|
||||
|
||||
def create_uploader(cursor, name, ul_id, url):
|
||||
|
||||
cursor.execute('INSERT INTO uploader (id, url, name) VALUES (%s,%s,%s) ON CONFLICT DO NOTHING',
|
||||
(ul_id, url, name))
|
||||
|
||||
|
||||
def create_license(cursor, name):
|
||||
|
||||
license_id = licenses_cache.get(name, None)
|
||||
if not license_id:
|
||||
cursor.execute('INSERT INTO license (name) VALUES (%s)', (name, ))
|
||||
cursor.execute('SELECT LASTVAL()')
|
||||
|
||||
license_id = cursor.fetchone()[0]
|
||||
licenses_cache[name] = license_id
|
||||
print("Created license '" + name + "' with id " + str(license_id))
|
||||
|
||||
|
||||
def create_video(cursor, **kwargs):
|
||||
|
||||
cursor.execute('INSERT INTO video (id, uploader_id, creator, upload_date, license_id, title, full_title,'
|
||||
' alt_title, file_name, description, annotation, webpage_url, view_count, like_count, '
|
||||
'dislike_count, display_id, duration, age_limit) '
|
||||
'VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) '
|
||||
'ON CONFLICT DO NOTHING ',
|
||||
(kwargs["id"],
|
||||
kwargs["uploader_id"], kwargs["creator"], kwargs["upload_date"],
|
||||
kwargs["license_id"],
|
||||
kwargs["title"], kwargs["full_title"], kwargs["alt_title"],
|
||||
kwargs["file_name"],
|
||||
kwargs["description"],
|
||||
kwargs["annotation"],
|
||||
kwargs["webpage_url"],
|
||||
kwargs["view_count"], kwargs["like_count"], kwargs["dislike_count"],
|
||||
kwargs["display_id"],
|
||||
kwargs["duration"],
|
||||
kwargs["age_limit"]
|
||||
))
|
||||
|
||||
print("Created video " + kwargs["id"])
|
||||
|
||||
|
||||
def create_subtitles(cursor, filename, lang, url, video_id):
|
||||
|
||||
with open(filename, "r") as f:
|
||||
data = f.read()
|
||||
|
||||
cursor.execute('INSERT INTO subtitles (language, url, data, video_id) VALUES (%s,%s,%s,%s) '
|
||||
'ON CONFLICT DO NOTHING',
|
||||
(lang, url, data, video_id))
|
||||
print("Create subtitles " + lang + " for " + video_id)
|
||||
|
||||
|
||||
def create_thumbnail(cursor, filename, url, tn_id, video_id):
|
||||
|
||||
if filename and os.path.exists(filename):
|
||||
with open(filename, "rb") as f:
|
||||
data = psycopg2.Binary(f.read())
|
||||
else:
|
||||
data = None
|
||||
|
||||
cursor.execute('INSERT INTO thumbnail (thumbnail_id, url, video_id, data) VALUES (%s,%s,%s,%s) '
|
||||
'ON CONFLICT DO NOTHING',
|
||||
(tn_id, url, video_id, data))
|
||||
print("Create thumbnail " + tn_id + " for " + video_id)
|
||||
|
||||
|
||||
def create_format(cursor, **kwargs):
|
||||
cursor.execute('INSERT INTO format (name, note, format_id, url, player_url, extension, audio_codec, video_codec, '
|
||||
'audio_bitrate, total_bitrate, file_size, quality, width, height, fps, video_id) '
|
||||
'VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) '
|
||||
'ON CONFLICT DO NOTHING ',
|
||||
(kwargs["name"],
|
||||
kwargs["note"],
|
||||
kwargs["format_id"],
|
||||
kwargs["url"],
|
||||
kwargs["player_url"],
|
||||
kwargs["extension"],
|
||||
kwargs["audio_codec"],
|
||||
kwargs["video_codec"],
|
||||
kwargs["audio_bitrate"],
|
||||
kwargs["total_bitrate"],
|
||||
kwargs["file_size"],
|
||||
kwargs["quality"],
|
||||
kwargs["width"], kwargs["height"],
|
||||
kwargs["fps"],
|
||||
kwargs["video_id"]
|
||||
))
|
||||
print("Create format " + kwargs["format_id"] + " for " + kwargs["video_id"])
|
||||
|
||||
|
||||
def create_chapter(cursor, video_id, start_time, end_time, title):
|
||||
|
||||
cursor.execute('INSERT INTO chatper (start_time, end_time, title, video_id) VALUES (%s,%s,%s,%s) '
|
||||
'ON CONFLICT DO NOTHING',
|
||||
(start_time, end_time, title, video_id))
|
||||
print("Created chapter for " + video_id)
|
||||
|
||||
|
||||
def import_json(filename):
|
||||
|
||||
directory = filename[:filename.rfind("/") + 1]
|
||||
|
||||
with open(filename, "r") as f:
|
||||
|
||||
meta = json.load(f)
|
||||
|
||||
with psycopg2.connect(DB_STR) as conn:
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
create_license(cursor, meta["license"])
|
||||
|
||||
create_uploader(cursor, meta["uploader"], meta["uploader_id"], meta["uploader_url"])
|
||||
|
||||
create_video(cursor,
|
||||
id=meta["id"],
|
||||
uploader_id=meta["uploader_id"],
|
||||
creator=meta["creator"],
|
||||
upload_date=meta["upload_date"],
|
||||
license_id=licenses_cache[meta["license"]],
|
||||
title=meta["title"],
|
||||
full_title=meta["fulltitle"],
|
||||
alt_title=meta["alt_title"],
|
||||
file_name=meta["_filename"],
|
||||
description=meta["description"],
|
||||
duration=meta["duration"],
|
||||
age_limit=meta["age_limit"],
|
||||
annotation=meta["annotations"],
|
||||
webpage_url=meta["webpage_url"],
|
||||
view_count=meta["view_count"],
|
||||
like_count=meta["like_count"],
|
||||
dislike_count=meta["dislike_count"],
|
||||
display_id=meta["display_id"])
|
||||
|
||||
for tag in meta["tags"]:
|
||||
create_tag(cursor, tag, meta["id"])
|
||||
|
||||
for category in meta["categories"]:
|
||||
create_category(cursor, category, meta["id"])
|
||||
|
||||
for sub in meta["subtitles"]:
|
||||
sub_filename = directory + meta["_filename"].replace(meta["ext"], sub + ".vtt")
|
||||
if os.path.exists(filename):
|
||||
create_subtitles(cursor, sub_filename, sub, meta["subtitles"][sub][0]["url"], meta["id"])
|
||||
|
||||
for tn in meta["thumbnails"]:
|
||||
# With the script, only the first (default) thumbnail is saved
|
||||
if tn["id"] == "0":
|
||||
tn_filename = directory + meta["_filename"].replace(meta["ext"], "jpg")
|
||||
else:
|
||||
tn_filename = None
|
||||
|
||||
create_thumbnail(cursor, tn_filename, tn["url"], tn["id"], meta["id"])
|
||||
|
||||
for frmt in meta["formats"]:
|
||||
create_format(cursor,
|
||||
name=frmt["format"],
|
||||
note=frmt.get("format_note", None),
|
||||
format_id=frmt["format_id"],
|
||||
url=frmt["url"],
|
||||
player_url=frmt.get("player_url", None),
|
||||
extension=frmt["ext"],
|
||||
audio_codec=frmt["acodec"],
|
||||
video_codec=frmt["vcodec"],
|
||||
audio_bitrate=frmt.get("abr", None),
|
||||
total_bitrate=frmt.get("tbr", None),
|
||||
file_size=frmt.get("filesize", None),
|
||||
quality=frmt.get("quality", None),
|
||||
width=frmt.get("width", None),
|
||||
height=frmt.get("height", None),
|
||||
fps=frmt.get("fps", None),
|
||||
video_id=meta["id"])
|
||||
|
||||
if meta["chapters"]:
|
||||
for chapter in meta["chapters"]:
|
||||
create_chapter(cursor, meta["id"], chapter["start_time"], chapter["end_time"], chapter["title"])
|
||||
|
||||
if meta["automatic_captions"]:
|
||||
print(meta["id"])
|
||||
quit()
|
||||
|
||||
|
||||
def import_recursive(root_path):
|
||||
|
||||
for root, dirs, files in os.walk(root_path):
|
||||
for file in files:
|
||||
ext = os.path.splitext(file)[1]
|
||||
if ext == ".json":
|
||||
filename = os.path.join(root, file)
|
||||
import_json(filename)
|
||||
|
||||
|
||||
init_cache()
|
||||
import_recursive("/home/simon/Downloads/yt/")
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
psycopg2
|
176
schema.sql
Normal file
176
schema.sql
Normal file
@ -0,0 +1,176 @@
|
||||
-- create database "yt-meta"
|
||||
-- ;
|
||||
|
||||
create table uploader
|
||||
(
|
||||
id text not null
|
||||
constraint uploader_id_pk
|
||||
primary key,
|
||||
url text not null,
|
||||
name text not null
|
||||
)
|
||||
;
|
||||
|
||||
create unique index uploader_id_uindex
|
||||
on uploader (id)
|
||||
;
|
||||
|
||||
create table category
|
||||
(
|
||||
id serial not null
|
||||
constraint category_pkey
|
||||
primary key,
|
||||
name text not null
|
||||
)
|
||||
;
|
||||
|
||||
create table tag
|
||||
(
|
||||
id serial not null
|
||||
constraint tag_pkey
|
||||
primary key,
|
||||
name text not null
|
||||
)
|
||||
;
|
||||
|
||||
create unique index tag_id_uindex
|
||||
on tag (id)
|
||||
;
|
||||
|
||||
create table license
|
||||
(
|
||||
id serial not null
|
||||
constraint license_pkey
|
||||
primary key,
|
||||
name text not null
|
||||
)
|
||||
;
|
||||
|
||||
create table video
|
||||
(
|
||||
id text not null
|
||||
constraint video_pkey
|
||||
primary key,
|
||||
uploader_id text not null
|
||||
constraint video_uploader_id_fk
|
||||
references uploader,
|
||||
creator text,
|
||||
upload_date date,
|
||||
license_id integer
|
||||
constraint video_license_id_fk
|
||||
references license,
|
||||
title text,
|
||||
full_title text,
|
||||
alt_title text,
|
||||
file_name text,
|
||||
description text,
|
||||
duration integer default 0,
|
||||
age_limit integer default 0,
|
||||
annotation text,
|
||||
webpage_url text,
|
||||
view_count integer,
|
||||
like_count integer,
|
||||
dislike_count integer,
|
||||
display_id text
|
||||
)
|
||||
;
|
||||
|
||||
create unique index video_id_uindex
|
||||
on video (id)
|
||||
;
|
||||
|
||||
create table format
|
||||
(
|
||||
name text,
|
||||
note text,
|
||||
format_id text not null,
|
||||
url text,
|
||||
player_url text,
|
||||
extension text,
|
||||
audio_codec text,
|
||||
video_codec text,
|
||||
audio_bitrate integer,
|
||||
total_bitrate integer,
|
||||
file_size bigint,
|
||||
quality integer,
|
||||
width integer,
|
||||
height integer,
|
||||
fps integer,
|
||||
video_id text not null
|
||||
constraint format_video_id_fk
|
||||
references video,
|
||||
constraint format_format_id_video_id_pk
|
||||
primary key (format_id, video_id)
|
||||
)
|
||||
;
|
||||
|
||||
create table thumbnail
|
||||
(
|
||||
thumbnail_id text not null,
|
||||
url text,
|
||||
video_id text not null
|
||||
constraint thumbnail_video_id_fk
|
||||
references video,
|
||||
data bytea,
|
||||
constraint thumbnail_thumbnail_id_video_id_pk
|
||||
primary key (thumbnail_id, video_id)
|
||||
)
|
||||
;
|
||||
|
||||
create table video_in_category
|
||||
(
|
||||
video_id text not null
|
||||
constraint video_in_category_video_id_fk
|
||||
references video,
|
||||
category_id integer not null
|
||||
constraint video_in_category_category_id_fk
|
||||
references category
|
||||
)
|
||||
;
|
||||
|
||||
create table video_has_tag
|
||||
(
|
||||
video_id text not null
|
||||
constraint video_has_tag_video_id_fk
|
||||
references video,
|
||||
tag_id integer not null
|
||||
constraint video_has_tag_tag_id_fk
|
||||
references tag
|
||||
)
|
||||
;
|
||||
|
||||
create unique index license_id_uindex
|
||||
on license (id)
|
||||
;
|
||||
|
||||
create table subtitles
|
||||
(
|
||||
language text not null,
|
||||
url text,
|
||||
data text,
|
||||
video_id text not null
|
||||
constraint subtitles_video_id_fk
|
||||
references video,
|
||||
constraint subtitles_language_video_id_pk
|
||||
primary key (language, video_id)
|
||||
)
|
||||
;
|
||||
|
||||
create table chatper
|
||||
(
|
||||
id serial not null
|
||||
constraint chatper_pkey
|
||||
primary key,
|
||||
start_time integer not null,
|
||||
end_time integer not null,
|
||||
title text,
|
||||
video_id text not null
|
||||
constraint chatper_video_id_fk
|
||||
references video
|
||||
)
|
||||
;
|
||||
|
||||
create unique index chatper_id_uindex
|
||||
on chatper (id)
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user