Added top websites scatter graph

This commit is contained in:
Simon 2018-06-20 12:21:34 -04:00
parent 7400bdc2a9
commit cf51bb381c
5 changed files with 128 additions and 3 deletions

1
app.py
View File

@ -57,6 +57,7 @@ def stats_page():
@app.route("/stats/json_chart")
def stats_json():
stats = searchEngine.get_global_stats()
db.join_website_on_stats(stats)
return Response(json.dumps(stats), mimetype="application/json")

View File

@ -234,6 +234,13 @@ class Database:
yield doc
def join_website_on_stats(self, stats):
websites = self.get_all_websites()
for website in stats["website_scatter"]:
website[0] = websites.get(website[0], "[DELETED]")
def add_blacklist_website(self, url):
with sqlite3.connect(self.db_path) as conn:

View File

@ -320,6 +320,32 @@ class ElasticSearchEngine(SearchEngine):
"size": 0
}, index=self.index_name)
website_scatter = self.es.search(body={
"query": {
"bool": {
"must_not": {
"term": {"size": -1},
}
}
},
"aggs": {
"websites": {
"terms": {
"field": "website_id",
"size": 300 # TODO: Figure out what size is appropriate
},
"aggs": {
"size": {
"sum": {
"field": "size"
}
}
}
}
},
"size": 0
}, index=self.index_name)
es_stats = self.es.indices.stats(self.index_name)
stats = dict()
@ -341,6 +367,8 @@ class ElasticSearchEngine(SearchEngine):
for b in size_and_date_histogram["aggregations"]["sizes"]["buckets"]]
stats["dates_histogram"] = [(b["key_as_string"], b["doc_count"])
for b in size_and_date_histogram["aggregations"]["dates"]["buckets"]]
stats["website_scatter"] = [[b["key"], b["doc_count"], b["size"]["value"]]
for b in website_scatter["aggregations"]["websites"]["buckets"]]
stats["base_url"] = "entire database"
return stats

View File

@ -1,3 +1,89 @@
function drawWebsiteScatter(rData) {
let dataSet = [];
let labels = [];
for (let i in rData["website_scatter"]) {
let website = rData["website_scatter"][i];
dataSet.push({x: website[1], y: website[2]});
labels.push(website[0] + " " + website[1] + " files, " + humanFileSize(website[2]))
}
let ctx = document.getElementById('websiteScatter').getContext('2d');
new Chart(ctx, {
type: 'scatter',
data: {
datasets: [{
data: dataSet,
borderWidth: 1,
borderColor: "#E94700",
backgroundColor: "rgba(233, 71, 0, 0.6)"
}],
labels: labels
},
options: {
title: {
display: true,
text: "Top " + labels.length + " websites",
fontColor: "#c6c6c6",
fontSize: 16,
fontFamily: "Lato,'Helvetica Neue',Arial,Helvetica,sans-serif"
},
legend: {
display: false
},
scales: {
xAxes: [
{
type: "logarithmic",
ticks: {
callback: function (value, index, values) {
let log10 = Math.log10(value);
if (Number.isInteger(log10)) {
return value;
}
}
},
scaleLabel: {
labelString: "File count",
display: true
}
}
],
yAxes: [
{
type: "logarithmic",
ticks: {
callback: function (value, index, values) {
let log10 = Math.log10(value);
if (Number.isInteger(log10)) {
return humanFileSize(value);
}
}
}
}
]
},
tooltips: {
callbacks: {
label: function(tooltipItem, data) {
console.log(tooltipItem);
console.log(data);
return data.labels[tooltipItem.index];
}
}
}
},
});
}
function drawSizeHistogram(rData) {
let labels = [];
@ -20,9 +106,7 @@ function drawSizeHistogram(rData) {
borderColor: "#E94700",
backgroundColor: "rgba(233, 71, 0, 0.6)"
}],
labels: labels,
title: "test"
labels: labels
},
options: {
title: {

View File

@ -19,6 +19,10 @@
<div id="chart-wrapper" style="margin-bottom: 1em">
<canvas id="dateHistogram"></canvas>
</div>
<div id="chart-wrapper" style="margin-bottom: 1em">
<canvas id="websiteScatter"></canvas>
</div>
<h4>Database stats</h4>
<table class="table table-striped">
@ -130,6 +134,7 @@
drawChart(rData);
drawSizeHistogram(rData);
drawDateHistogram(rData);
drawWebsiteScatter(rData);
fillDatabaseTable(rData);
document.getElementById("loading-text").innerHTML = "";