mirror of
https://github.com/simon987/od-database.git
synced 2025-04-18 18:06:44 +00:00
Changed from mime to extension for graph and added script to clear invalid websites
This commit is contained in:
parent
819e2fbddb
commit
bb872a9248
9
clean_invalid_websites.py
Normal file
9
clean_invalid_websites.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from database import Database
|
||||||
|
|
||||||
|
|
||||||
|
db = Database("db.sqlite3")
|
||||||
|
websites_to_delete = db.get_websites_smaller(10000000)
|
||||||
|
for website_id in [x[0] for x in websites_to_delete]:
|
||||||
|
db.clear_website(website_id)
|
||||||
|
db.delete_website(website_id)
|
||||||
|
print("Deleted " + str(website_id))
|
26
database.py
26
database.py
@ -73,15 +73,15 @@ class Database:
|
|||||||
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
||||||
website_paths[file.path] = cursor.fetchone()[0]
|
website_paths[file.path] = cursor.fetchone()[0]
|
||||||
|
|
||||||
# Then MimeTypes
|
# Then FileTypes
|
||||||
mimetypes = dict()
|
mimetypes = dict()
|
||||||
cursor.execute("SELECT * FROM MimeType")
|
cursor.execute("SELECT * FROM FileType")
|
||||||
db_mimetypes = cursor.fetchall()
|
db_mimetypes = cursor.fetchall()
|
||||||
for db_mimetype in db_mimetypes:
|
for db_mimetype in db_mimetypes:
|
||||||
mimetypes[db_mimetype[1]] = db_mimetype[0]
|
mimetypes[db_mimetype[1]] = db_mimetype[0]
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.mime not in mimetypes:
|
if file.mime not in mimetypes:
|
||||||
cursor.execute("INSERT INTO MimeType (mime) VALUES (?)", (file.mime, ))
|
cursor.execute("INSERT INTO FileType (mime) VALUES (?)", (file.mime, ))
|
||||||
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
||||||
mimetypes[file.mime] = cursor.fetchone()[0]
|
mimetypes[file.mime] = cursor.fetchone()[0]
|
||||||
|
|
||||||
@ -103,7 +103,8 @@ class Database:
|
|||||||
|
|
||||||
with open(json_file, "r") as f:
|
with open(json_file, "r") as f:
|
||||||
try:
|
try:
|
||||||
self.insert_files([File(website_id, x["path"], x["mime"], x["name"], x["size"]) for x in json.load(f)])
|
self.insert_files([File(website_id, x["path"], os.path.splitext(x["name"])[1].lower(), x["name"], x["size"])
|
||||||
|
for x in json.load(f)])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
print("Couldn't read json file!")
|
print("Couldn't read json file!")
|
||||||
@ -218,11 +219,11 @@ class Database:
|
|||||||
"WHERE File.path_id IN (SELECT id FROM WebsitePath WHERE website_id = ?)", (website_id, ))
|
"WHERE File.path_id IN (SELECT id FROM WebsitePath WHERE website_id = ?)", (website_id, ))
|
||||||
file_sum, file_count = cursor.fetchone()
|
file_sum, file_count = cursor.fetchone()
|
||||||
|
|
||||||
cursor.execute("SELECT SUM(File.size) as total_size, COUNT(File.id), MimeType.mime FROM File "
|
cursor.execute("SELECT SUM(File.size) as total_size, COUNT(File.id), FileType.mime FROM File "
|
||||||
"INNER JOIN MimeType ON MimeType.id = File.mime_id "
|
"INNER JOIN FileType ON FileType.id = File.mime_id "
|
||||||
"INNER JOIN WebsitePath Path on File.path_id = Path.id "
|
"INNER JOIN WebsitePath Path on File.path_id = Path.id "
|
||||||
"WHERE Path.website_id = ? "
|
"WHERE Path.website_id = ? "
|
||||||
"GROUP BY MimeType.id ORDER BY total_size DESC", (website_id, ))
|
"GROUP BY FileType.id ORDER BY total_size DESC", (website_id, ))
|
||||||
db_mime_stats = cursor.fetchall()
|
db_mime_stats = cursor.fetchall()
|
||||||
|
|
||||||
cursor.execute("SELECT Website.url, Website.last_modified FROM Website WHERE id = ?", (website_id, ))
|
cursor.execute("SELECT Website.url, Website.last_modified FROM Website WHERE id = ?", (website_id, ))
|
||||||
@ -287,6 +288,17 @@ class Database:
|
|||||||
cursor.execute("SELECT Website.id FROM Website WHERE last_modified < ?", (date, ))
|
cursor.execute("SELECT Website.id FROM Website WHERE last_modified < ?", (date, ))
|
||||||
return [x[0] for x in cursor.fetchall()]
|
return [x[0] for x in cursor.fetchall()]
|
||||||
|
|
||||||
|
def get_websites_smaller(self, size: int):
|
||||||
|
"""Get the websites with total size smaller than specified"""
|
||||||
|
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("SELECT Website.id FROM Website "
|
||||||
|
"INNER JOIN WebsitePath Path on Website.id = Path.website_id "
|
||||||
|
"INNER JOIN File F on Path.id = F.path_id "
|
||||||
|
"GROUP BY Website.id HAVING SUM(F.size) < ?", (size, ))
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
def delete_website(self, website_id):
|
def delete_website(self, website_id):
|
||||||
|
|
||||||
with sqlite3.connect(self.db_path) as conn:
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
@ -2,4 +2,4 @@ SELECT Website.url, WebsitePath.path, File.name, File.size, MT.mime
|
|||||||
FROM File
|
FROM File
|
||||||
INNER JOIN WebsitePath on File.path_id = WebsitePath.id
|
INNER JOIN WebsitePath on File.path_id = WebsitePath.id
|
||||||
INNER JOIN Website on WebsitePath.website_id = Website.id
|
INNER JOIN Website on WebsitePath.website_id = Website.id
|
||||||
INNER JOIN MimeType MT on File.mime_id = MT.id;
|
INNER JOIN FileType MT on File.mime_id = MT.id;
|
||||||
|
@ -16,7 +16,7 @@ CREATE TABLE WebsitePath (
|
|||||||
FOREIGN KEY (website_id) REFERENCES Website(id)
|
FOREIGN KEY (website_id) REFERENCES Website(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE MimeType (
|
CREATE TABLE FileType (
|
||||||
id INTEGER PRIMARY KEY NOT NULL,
|
id INTEGER PRIMARY KEY NOT NULL,
|
||||||
mime TEXT
|
mime TEXT
|
||||||
);
|
);
|
||||||
@ -29,7 +29,7 @@ CREATE TABLE File (
|
|||||||
size INTEGER,
|
size INTEGER,
|
||||||
|
|
||||||
FOREIGN KEY (path_id) REFERENCES WebsitePath(id),
|
FOREIGN KEY (path_id) REFERENCES WebsitePath(id),
|
||||||
FOREIGN KEY (mime_id) REFERENCES MimeType(id)
|
FOREIGN KEY (mime_id) REFERENCES FileType(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE Queue (
|
CREATE TABLE Queue (
|
||||||
|
2
static/js/jquery.min.js
vendored
2
static/js/jquery.min.js
vendored
File diff suppressed because one or more lines are too long
1
task.py
1
task.py
@ -57,6 +57,7 @@ class TaskManager:
|
|||||||
print("Imported in SQLite3")
|
print("Imported in SQLite3")
|
||||||
|
|
||||||
if post_id:
|
if post_id:
|
||||||
|
# TODO check should_comment()
|
||||||
stats = self.db.get_website_stats(website.id)
|
stats = self.db.get_website_stats(website.id)
|
||||||
comment = self.reddit_bot.get_comment(stats, website.id)
|
comment = self.reddit_bot.get_comment(stats, website.id)
|
||||||
print(comment)
|
print(comment)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user