Crawl stats: time format + sorting (#10)

* Nicer stats
* Fix right align
* No leading day zeros
* Fix right-align padding
This commit is contained in:
terorie 2018-12-14 15:30:06 +01:00 committed by Simon Fortier
parent e89eb6e3e0
commit 1ac3b97d7e
4 changed files with 34 additions and 13 deletions

13
app.py
View File

@ -61,6 +61,19 @@ def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
return time.strftime(format, time.gmtime(value))
@app.template_filter("duration_format")
def duration_format(value):
delay = datetime.timedelta(seconds=value)
if (delay.days > 0):
out = str(delay).replace(" days, ", ":")
else:
out = str(delay)
out_ar = out.split(':')
out_ar = ["%02d" % (int(float(x))) for x in out_ar]
out = ":".join(out_ar)
return out
@app.template_filter("from_timestamp")
def from_timestamp(value):
return datetime.datetime.fromtimestamp(value)

View File

@ -334,18 +334,21 @@ class Database:
return [tasks.TaskResult(r[1], r[2], r[3], r[4], r[0], str(r[5])) for r in cursor.fetchall()]
def get_stats_by_crawler(self):
stats = dict()
stats = []
task_results = self.get_crawl_logs()
for crawler in self.get_tokens():
task_count = sum(1 for result in task_results if result.server_name == crawler.name)
if task_count > 0:
stats[crawler.name] = dict()
stats[crawler.name]["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
stats[crawler.name]["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
stats[crawler.name]["task_count"] = task_count
stats[crawler.name]["time_avg"] = stats[crawler.name]["time"] / task_count
stats[crawler.name]["file_count_avg"] = stats[crawler.name]["file_count"] / task_count
info = dict()
info["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
info["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
info["task_count"] = task_count
info["time_avg"] = info["time"] / task_count
info["file_count_avg"] = info["file_count"] / task_count
stats.append((crawler.name, info))
stats.sort(key=lambda t: t[1]["file_count"], reverse=True)
return stats

View File

@ -19,6 +19,10 @@ a {
.table td {
padding: 2px 0;
}
.td-numeric {
text-align: end;
padding-right: 1em;
}
.bg-application {
background: #8FB847;

View File

@ -83,14 +83,15 @@
</tr>
</thead>
<tbody>
{% for server in crawl_server_stats %}
{% for entry in crawl_server_stats %}
{% set server, info = entry %}
<tr>
<td><b>{{ server }}</b></td>
<td>{{ crawl_server_stats[server].task_count }}</td>
<td>{{ crawl_server_stats[server].time|round(2) }}s</td>
<td>{{ crawl_server_stats[server].time_avg|round(2) }}s</td>
<td>{{ crawl_server_stats[server].file_count }}</td>
<td>{{ crawl_server_stats[server].file_count_avg | round(2) }}</td>
<td class="td-numeric">{{ info.task_count }}</td>
<td class="td-numeric">{{ info.time | duration_format() }}</td>
<td class="td-numeric">{{ info.time_avg | duration_format() }}</td>
<td class="td-numeric">{{ info.file_count }}</td>
<td class="td-numeric">{{ "%.2f" % info.file_count_avg }}</td>
</tr>
{% endfor %}
</tbody>