diff --git a/app.py b/app.py index f1dcf0b..c19482d 100644 --- a/app.py +++ b/app.py @@ -57,6 +57,7 @@ def stats_page(): @app.route("/stats/json_chart") def stats_json(): stats = searchEngine.get_global_stats() + db.join_website_on_stats(stats) return Response(json.dumps(stats), mimetype="application/json") diff --git a/database.py b/database.py index 78076c1..88ec7f5 100644 --- a/database.py +++ b/database.py @@ -234,6 +234,13 @@ class Database: yield doc + def join_website_on_stats(self, stats): + + websites = self.get_all_websites() + + for website in stats["website_scatter"]: + website[0] = websites.get(website[0], "[DELETED]") + def add_blacklist_website(self, url): with sqlite3.connect(self.db_path) as conn: diff --git a/search/search.py b/search/search.py index a0758a9..2ef5319 100644 --- a/search/search.py +++ b/search/search.py @@ -320,6 +320,32 @@ class ElasticSearchEngine(SearchEngine): "size": 0 }, index=self.index_name) + website_scatter = self.es.search(body={ + "query": { + "bool": { + "must_not": { + "term": {"size": -1}, + } + } + }, + "aggs": { + "websites": { + "terms": { + "field": "website_id", + "size": 300 # TODO: Figure out what size is appropriate + }, + "aggs": { + "size": { + "sum": { + "field": "size" + } + } + } + } + }, + "size": 0 + }, index=self.index_name) + es_stats = self.es.indices.stats(self.index_name) stats = dict() @@ -341,6 +367,8 @@ class ElasticSearchEngine(SearchEngine): for b in size_and_date_histogram["aggregations"]["sizes"]["buckets"]] stats["dates_histogram"] = [(b["key_as_string"], b["doc_count"]) for b in size_and_date_histogram["aggregations"]["dates"]["buckets"]] + stats["website_scatter"] = [[b["key"], b["doc_count"], b["size"]["value"]] + for b in website_scatter["aggregations"]["websites"]["buckets"]] stats["base_url"] = "entire database" return stats diff --git a/static/js/report.js b/static/js/report.js index eb9317c..6616b02 100644 --- a/static/js/report.js +++ b/static/js/report.js @@ -1,3 +1,89 @@ +function drawWebsiteScatter(rData) { + + let dataSet = []; + let labels = []; + + for (let i in rData["website_scatter"]) { + + let website = rData["website_scatter"][i]; + + dataSet.push({x: website[1], y: website[2]}); + labels.push(website[0] + " " + website[1] + " files, " + humanFileSize(website[2])) + } + + let ctx = document.getElementById('websiteScatter').getContext('2d'); + new Chart(ctx, { + type: 'scatter', + data: { + datasets: [{ + data: dataSet, + borderWidth: 1, + borderColor: "#E94700", + backgroundColor: "rgba(233, 71, 0, 0.6)" + }], + labels: labels + }, + options: { + title: { + display: true, + text: "Top " + labels.length + " websites", + fontColor: "#c6c6c6", + fontSize: 16, + fontFamily: "Lato,'Helvetica Neue',Arial,Helvetica,sans-serif" + }, + legend: { + display: false + }, + scales: { + xAxes: [ + { + type: "logarithmic", + ticks: { + callback: function (value, index, values) { + + let log10 = Math.log10(value); + + if (Number.isInteger(log10)) { + return value; + } + } + }, + scaleLabel: { + labelString: "File count", + display: true + } + } + ], + yAxes: [ + { + type: "logarithmic", + ticks: { + callback: function (value, index, values) { + + let log10 = Math.log10(value); + + if (Number.isInteger(log10)) { + return humanFileSize(value); + } + } + } + } + ] + }, + tooltips: { + callbacks: { + label: function(tooltipItem, data) { + console.log(tooltipItem); + console.log(data); + return data.labels[tooltipItem.index]; + } + } + } + }, + }); + +} + function drawSizeHistogram(rData) { let labels = []; @@ -20,9 +106,7 @@ function drawSizeHistogram(rData) { borderColor: "#E94700", backgroundColor: "rgba(233, 71, 0, 0.6)" }], - labels: labels, - title: "test" - + labels: labels }, options: { title: { diff --git a/templates/stats.html b/templates/stats.html index a15232e..0795d00 100644 --- a/templates/stats.html +++ b/templates/stats.html @@ -19,6 +19,10 @@