From 1ac3b97d7ebbedff614822bb29d609d182df77ab Mon Sep 17 00:00:00 2001 From: terorie Date: Fri, 14 Dec 2018 15:30:06 +0100 Subject: [PATCH] Crawl stats: time format + sorting (#10) * Nicer stats * Fix right align * No leading day zeros * Fix right-align padding --- app.py | 13 +++++++++++++ database.py | 17 ++++++++++------- static/css/main.css | 4 ++++ templates/stats.html | 13 +++++++------ 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/app.py b/app.py index dae9c1a..8eee6b5 100644 --- a/app.py +++ b/app.py @@ -61,6 +61,19 @@ def datetime_format(value, format='%Y-%m-%d %H:%M:%S'): return time.strftime(format, time.gmtime(value)) +@app.template_filter("duration_format") +def duration_format(value): + delay = datetime.timedelta(seconds=value) + if (delay.days > 0): + out = str(delay).replace(" days, ", ":") + else: + out = str(delay) + out_ar = out.split(':') + out_ar = ["%02d" % (int(float(x))) for x in out_ar] + out = ":".join(out_ar) + return out + + @app.template_filter("from_timestamp") def from_timestamp(value): return datetime.datetime.fromtimestamp(value) diff --git a/database.py b/database.py index f3b7f2a..aaa1e65 100644 --- a/database.py +++ b/database.py @@ -334,18 +334,21 @@ class Database: return [tasks.TaskResult(r[1], r[2], r[3], r[4], r[0], str(r[5])) for r in cursor.fetchall()] def get_stats_by_crawler(self): - stats = dict() + stats = [] task_results = self.get_crawl_logs() for crawler in self.get_tokens(): task_count = sum(1 for result in task_results if result.server_name == crawler.name) if task_count > 0: - stats[crawler.name] = dict() - stats[crawler.name]["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name) - stats[crawler.name]["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name) - stats[crawler.name]["task_count"] = task_count - stats[crawler.name]["time_avg"] = stats[crawler.name]["time"] / task_count - stats[crawler.name]["file_count_avg"] = stats[crawler.name]["file_count"] / task_count + info = dict() + info["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name) + info["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name) + info["task_count"] = task_count + info["time_avg"] = info["time"] / task_count + info["file_count_avg"] = info["file_count"] / task_count + stats.append((crawler.name, info)) + + stats.sort(key=lambda t: t[1]["file_count"], reverse=True) return stats diff --git a/static/css/main.css b/static/css/main.css index a49b321..3db1cc5 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -19,6 +19,10 @@ a { .table td { padding: 2px 0; } +.td-numeric { + text-align: end; + padding-right: 1em; +} .bg-application { background: #8FB847; diff --git a/templates/stats.html b/templates/stats.html index 26af6bb..73139dd 100644 --- a/templates/stats.html +++ b/templates/stats.html @@ -83,14 +83,15 @@ - {% for server in crawl_server_stats %} + {% for entry in crawl_server_stats %} + {% set server, info = entry %} {{ server }} - {{ crawl_server_stats[server].task_count }} - {{ crawl_server_stats[server].time|round(2) }}s - {{ crawl_server_stats[server].time_avg|round(2) }}s - {{ crawl_server_stats[server].file_count }} - {{ crawl_server_stats[server].file_count_avg | round(2) }} + {{ info.task_count }} + {{ info.time | duration_format() }} + {{ info.time_avg | duration_format() }} + {{ info.file_count }} + {{ "%.2f" % info.file_count_avg }} {% endfor %}