mirror of
https://github.com/simon987/od-database.git
synced 2025-04-16 17:06:46 +00:00
Changed from mime to extension for graph and added script to clear invalid websites
This commit is contained in:
parent
819e2fbddb
commit
bb872a9248
9
clean_invalid_websites.py
Normal file
9
clean_invalid_websites.py
Normal file
@ -0,0 +1,9 @@
|
||||
from database import Database
|
||||
|
||||
|
||||
db = Database("db.sqlite3")
|
||||
websites_to_delete = db.get_websites_smaller(10000000)
|
||||
for website_id in [x[0] for x in websites_to_delete]:
|
||||
db.clear_website(website_id)
|
||||
db.delete_website(website_id)
|
||||
print("Deleted " + str(website_id))
|
26
database.py
26
database.py
@ -73,15 +73,15 @@ class Database:
|
||||
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
||||
website_paths[file.path] = cursor.fetchone()[0]
|
||||
|
||||
# Then MimeTypes
|
||||
# Then FileTypes
|
||||
mimetypes = dict()
|
||||
cursor.execute("SELECT * FROM MimeType")
|
||||
cursor.execute("SELECT * FROM FileType")
|
||||
db_mimetypes = cursor.fetchall()
|
||||
for db_mimetype in db_mimetypes:
|
||||
mimetypes[db_mimetype[1]] = db_mimetype[0]
|
||||
for file in files:
|
||||
if file.mime not in mimetypes:
|
||||
cursor.execute("INSERT INTO MimeType (mime) VALUES (?)", (file.mime, ))
|
||||
cursor.execute("INSERT INTO FileType (mime) VALUES (?)", (file.mime, ))
|
||||
cursor.execute("SELECT LAST_INSERT_ROWID()")
|
||||
mimetypes[file.mime] = cursor.fetchone()[0]
|
||||
|
||||
@ -103,7 +103,8 @@ class Database:
|
||||
|
||||
with open(json_file, "r") as f:
|
||||
try:
|
||||
self.insert_files([File(website_id, x["path"], x["mime"], x["name"], x["size"]) for x in json.load(f)])
|
||||
self.insert_files([File(website_id, x["path"], os.path.splitext(x["name"])[1].lower(), x["name"], x["size"])
|
||||
for x in json.load(f)])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Couldn't read json file!")
|
||||
@ -218,11 +219,11 @@ class Database:
|
||||
"WHERE File.path_id IN (SELECT id FROM WebsitePath WHERE website_id = ?)", (website_id, ))
|
||||
file_sum, file_count = cursor.fetchone()
|
||||
|
||||
cursor.execute("SELECT SUM(File.size) as total_size, COUNT(File.id), MimeType.mime FROM File "
|
||||
"INNER JOIN MimeType ON MimeType.id = File.mime_id "
|
||||
cursor.execute("SELECT SUM(File.size) as total_size, COUNT(File.id), FileType.mime FROM File "
|
||||
"INNER JOIN FileType ON FileType.id = File.mime_id "
|
||||
"INNER JOIN WebsitePath Path on File.path_id = Path.id "
|
||||
"WHERE Path.website_id = ? "
|
||||
"GROUP BY MimeType.id ORDER BY total_size DESC", (website_id, ))
|
||||
"GROUP BY FileType.id ORDER BY total_size DESC", (website_id, ))
|
||||
db_mime_stats = cursor.fetchall()
|
||||
|
||||
cursor.execute("SELECT Website.url, Website.last_modified FROM Website WHERE id = ?", (website_id, ))
|
||||
@ -287,6 +288,17 @@ class Database:
|
||||
cursor.execute("SELECT Website.id FROM Website WHERE last_modified < ?", (date, ))
|
||||
return [x[0] for x in cursor.fetchall()]
|
||||
|
||||
def get_websites_smaller(self, size: int):
|
||||
"""Get the websites with total size smaller than specified"""
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT Website.id FROM Website "
|
||||
"INNER JOIN WebsitePath Path on Website.id = Path.website_id "
|
||||
"INNER JOIN File F on Path.id = F.path_id "
|
||||
"GROUP BY Website.id HAVING SUM(F.size) < ?", (size, ))
|
||||
return cursor.fetchall()
|
||||
|
||||
def delete_website(self, website_id):
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
|
@ -2,4 +2,4 @@ SELECT Website.url, WebsitePath.path, File.name, File.size, MT.mime
|
||||
FROM File
|
||||
INNER JOIN WebsitePath on File.path_id = WebsitePath.id
|
||||
INNER JOIN Website on WebsitePath.website_id = Website.id
|
||||
INNER JOIN MimeType MT on File.mime_id = MT.id;
|
||||
INNER JOIN FileType MT on File.mime_id = MT.id;
|
||||
|
@ -16,7 +16,7 @@ CREATE TABLE WebsitePath (
|
||||
FOREIGN KEY (website_id) REFERENCES Website(id)
|
||||
);
|
||||
|
||||
CREATE TABLE MimeType (
|
||||
CREATE TABLE FileType (
|
||||
id INTEGER PRIMARY KEY NOT NULL,
|
||||
mime TEXT
|
||||
);
|
||||
@ -29,7 +29,7 @@ CREATE TABLE File (
|
||||
size INTEGER,
|
||||
|
||||
FOREIGN KEY (path_id) REFERENCES WebsitePath(id),
|
||||
FOREIGN KEY (mime_id) REFERENCES MimeType(id)
|
||||
FOREIGN KEY (mime_id) REFERENCES FileType(id)
|
||||
);
|
||||
|
||||
CREATE TABLE Queue (
|
||||
|
2
static/js/jquery.min.js
vendored
2
static/js/jquery.min.js
vendored
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user