diff --git a/app.py b/app.py index 7e6012e..289f094 100644 --- a/app.py +++ b/app.py @@ -112,6 +112,55 @@ def websites(): return render_template("websites.html", websites=db.get_websites(100, page)) +@app.route("/website/delete_empty") +def admin_delete_empty_website(): + """Delete websites with no associated files that are not queued""" + + if "username" in session: + + current_tasks = taskDispatcher.get_queued_tasks() + taskDispatcher.get_current_tasks() + queued_websites = [task.website_id for task in current_tasks] + all_websites = db.get_all_websites() + non_queued_websites = list(set(all_websites).difference(queued_websites)) + + empty_websites = searchEngine.are_empty(non_queued_websites) + + for website in empty_websites: + db.delete_website(website) + + flash("Deleted: " + repr(list(empty_websites)), "success") + return redirect("/dashboard") + + else: + abort(403) + + +@app.route("/website//clear") +def admin_clear_website(website_id): + + if "username" in session: + + searchEngine.delete_docs(website_id) + flash("Cleared all documents associated with this website", "success") + return redirect("/website/" + str(website_id)) + else: + abort(403) + + +@app.route("/website//delete") +def admin_delete_website(website_id): + + if "username" in session: + + searchEngine.delete_docs(website_id) + db.delete_website(website_id) + flash("Deleted website " + str(website_id), "success") + return redirect("/website/") + + else: + abort(403) + + @app.route("/search") def search(): diff --git a/blacklist.txt b/blacklist.txt index b45e907..1c98596 100644 --- a/blacklist.txt +++ b/blacklist.txt @@ -5,4 +5,5 @@ http://mirror.math.princeton.edu https://www.dropbox.com https://oss.jfrog.org http://skyarchive.info -https://skyarchive.info \ No newline at end of file +https://skyarchive.info +http://ftp.ubuntu.com/ \ No newline at end of file diff --git a/database.py b/database.py index 7b8249a..864cd15 100644 --- a/database.py +++ b/database.py @@ -114,16 +114,6 @@ class Database: website_id = cursor.fetchone() return website_id[0] if website_id else None - def website_has_been_scanned(self, url): - """Check if a website has at least 1 file""" - # TODO: Check with SearchEngine - print("FIXME: website_has_been_scanned") - - def clear_website(self, website_id): - """Remove all files from a website and update its last_updated date""" - # TODO: Check with SearchEngine - print("FIXME: clear_website") - def get_websites_older(self, delta: datetime.timedelta): """Get websites last updated before a given date""" date = datetime.datetime.utcnow() - delta @@ -158,7 +148,7 @@ class Database: with sqlite3.connect(self.db_path) as conn: cursor = conn.cursor() - hashed_pw = bcrypt.hashpw(password.encode(), bcrypt.gensalt(14)) + hashed_pw = bcrypt.hashpw(password.encode(), bcrypt.gensalt(12)) cursor.execute("INSERT INTO Admin (username, password) VALUES (?,?)", (username, hashed_pw)) conn.commit() diff --git a/od_util.py b/od_util.py index ce6aafd..6193a3a 100644 --- a/od_util.py +++ b/od_util.py @@ -40,7 +40,8 @@ category_map = { 'wiz': 'application', 'wsdl': 'application', 'xlb': 'application', 'xls': 'application', 'xpdl': 'application', 'xsl': 'application', 'torrent': 'application', 'rpm': 'application', 'deb': 'application', - 'atr': 'application', + 'atr': 'application', 'class': 'application', 'ttf': 'application', + 'img': 'application', 'msi': 'application', 'run': 'application', # Text category 'java': 'text', 'cpp': 'text', 'rb': 'text', 'bat': 'text', 'latex': 'text', 'xml': 'text', @@ -51,7 +52,10 @@ category_map = { 'h': 'text', 'tsv': 'text', 'rtx': 'text', 'sgm': 'text', 'sgml': 'text', 'txt': 'text', 'vcf': 'text', 'pdf': 'text', 'epub': 'text', - 'srt': 'text', + 'srt': 'text', 'inc': 'text', 'php': 'text', + 'cbz': 'text', 'docx': 'text', 'mobi': 'text', + 'chm': 'text', 'xlsx': "text", 'djvu': 'text', + 'rtf': 'text', 'log': 'text', 'md': 'text', # Video category '3g2': 'video', '3gp': 'video', 'asf': 'video', 'asx': 'video', 'avi': 'video', 'flv': 'video', @@ -60,13 +64,16 @@ category_map = { 'm3u': 'video', 'm3u8': 'video', 'movie': 'video', 'mp4': 'video', 'mpa': 'video', 'mpe': 'video', 'mpeg': 'video', 'mpg': 'video', 'mkv': 'video', - 'wmv': 'video', 'm4s': 'video', + 'wmv': 'video', 'm4s': 'video', 'ogv': 'video', + 'm4b': 'video', 'm4v': 'video', # Audio category 'wav': 'audio', 'snd': 'audio', 'mp2': 'audio', 'aif': 'audio', 'iff': 'audio', 'm4a': 'audio', 'mid': 'audio', 'midi': 'audio', 'mp3': 'audio', 'wma': 'audio', 'ra': 'audio', 'aifc': 'audio', 'aiff': 'audio', 'au': 'audio', 'flac': 'audio', + 'ogg': 'audio', 'oga': 'audio', 'mka': 'video', + 'ac3': 'audio', # Image category 'bmp': 'image', 'gif': 'image', 'jpg': 'image', 'xwd': 'image', 'tif': 'image', 'tiff': 'image', @@ -107,7 +114,7 @@ category_map = { 'xp3': 'archive', 'yz1': 'archive', 'zip': 'archive', 'zipx': 'archive', 'zoo': 'archive', 'zpaq': 'archive', 'zz': 'archive', 'xpi': 'archive', 'tgz': 'archive', - 'tbz': 'archive', + 'tbz': 'archive', 'tar': 'archive', 'bz': 'archive', } colors = { diff --git a/search/search.py b/search/search.py index 7e1746b..a0758a9 100644 --- a/search/search.py +++ b/search/search.py @@ -352,3 +352,36 @@ class ElasticSearchEngine(SearchEngine): "match_all": {} } }, scroll="5m", client=self.es, index=self.index_name) + + def are_empty(self, websites): + + result = self.es.search(body={ + "query": { + "bool": { + "filter": { + "terms": { + "website_id": websites + }, + } + } + }, + "aggs": { + "websites": { + "terms": { + "field": "website_id", + "size": 100000, + "min_doc_count": 1 + } + } + }, + "size": 0 + }, index=self.index_name) + + non_empty_websites = [bucket["key"] for bucket in result["aggregations"]["websites"]["buckets"]] + + for website in websites: + if website not in non_empty_websites: + yield website + + + diff --git a/static/js/report.js b/static/js/report.js index c6d5612..eb9317c 100644 --- a/static/js/report.js +++ b/static/js/report.js @@ -126,7 +126,7 @@ function drawChart(rData) { for (let ext in rData["ext_stats"]) { - dataSetSize.push(rData["ext_stats"][ext][0]); + dataSetSize.push(Math.max(rData["ext_stats"][ext][0], 0)); dataSetCount.push(rData["ext_stats"][ext][1]); labels.push(rData["ext_stats"][ext][2] + " x" + rData["ext_stats"][ext][1] + " (" + humanFileSize(rData["ext_stats"][ext][0]) + ")"); @@ -252,7 +252,7 @@ category_map = { 'h': 'text', 'tsv': 'text', 'rtx': 'text', 'sgm': 'text', 'sgml': 'text', 'txt': 'text', 'vcf': 'text', 'pdf': 'text', 'epub': 'text', - 'srt': 'text', 'cbr': 'text', + 'srt': 'text', 'cbr': 'text', 'inc': 'text', //Video category '3g2': 'video', '3gp': 'video', 'asf': 'video', 'asx': 'video', 'avi': 'video', 'flv': 'video', @@ -261,7 +261,8 @@ category_map = { 'm3u': 'video', 'm3u8': 'video', 'movie': 'video', 'mp4': 'video', 'mpa': 'video', 'mpe': 'video', 'mpeg': 'video', 'mpg': 'video', 'mkv': 'video', - 'wmv': 'video', 'm4s': 'video', + 'wmv': 'video', 'm4s': 'video', 'm4v': 'video', + 'mp4a': 'video', // Audio category 'wav': 'audio', 'snd': 'audio', 'mp2': 'audio', 'aif': 'audio', 'iff': 'audio', 'm4a': 'audio', diff --git a/templates/dashboard.html b/templates/dashboard.html index ee6129d..0d3361f 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -43,9 +43,11 @@ +
+

Misc actions

-

TODO:

+ Delete websites with no associated files that are not queued
Logout diff --git a/templates/website.html b/templates/website.html index 1d1134b..89c7297 100644 --- a/templates/website.html +++ b/templates/website.html @@ -40,6 +40,10 @@
Link list Summary (JSON) + {% if "username" in session %} + Clear + Delete + {% endif %}