mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 10:26:44 +00:00
Crawl stats: time format + sorting (#10)
* Nicer stats * Fix right align * No leading day zeros * Fix right-align padding
This commit is contained in:
parent
e89eb6e3e0
commit
1ac3b97d7e
13
app.py
13
app.py
@ -61,6 +61,19 @@ def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
||||
return time.strftime(format, time.gmtime(value))
|
||||
|
||||
|
||||
@app.template_filter("duration_format")
|
||||
def duration_format(value):
|
||||
delay = datetime.timedelta(seconds=value)
|
||||
if (delay.days > 0):
|
||||
out = str(delay).replace(" days, ", ":")
|
||||
else:
|
||||
out = str(delay)
|
||||
out_ar = out.split(':')
|
||||
out_ar = ["%02d" % (int(float(x))) for x in out_ar]
|
||||
out = ":".join(out_ar)
|
||||
return out
|
||||
|
||||
|
||||
@app.template_filter("from_timestamp")
|
||||
def from_timestamp(value):
|
||||
return datetime.datetime.fromtimestamp(value)
|
||||
|
17
database.py
17
database.py
@ -334,18 +334,21 @@ class Database:
|
||||
return [tasks.TaskResult(r[1], r[2], r[3], r[4], r[0], str(r[5])) for r in cursor.fetchall()]
|
||||
|
||||
def get_stats_by_crawler(self):
|
||||
stats = dict()
|
||||
stats = []
|
||||
task_results = self.get_crawl_logs()
|
||||
|
||||
for crawler in self.get_tokens():
|
||||
task_count = sum(1 for result in task_results if result.server_name == crawler.name)
|
||||
if task_count > 0:
|
||||
stats[crawler.name] = dict()
|
||||
stats[crawler.name]["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
|
||||
stats[crawler.name]["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
|
||||
stats[crawler.name]["task_count"] = task_count
|
||||
stats[crawler.name]["time_avg"] = stats[crawler.name]["time"] / task_count
|
||||
stats[crawler.name]["file_count_avg"] = stats[crawler.name]["file_count"] / task_count
|
||||
info = dict()
|
||||
info["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
|
||||
info["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
|
||||
info["task_count"] = task_count
|
||||
info["time_avg"] = info["time"] / task_count
|
||||
info["file_count_avg"] = info["file_count"] / task_count
|
||||
stats.append((crawler.name, info))
|
||||
|
||||
stats.sort(key=lambda t: t[1]["file_count"], reverse=True)
|
||||
|
||||
return stats
|
||||
|
||||
|
@ -19,6 +19,10 @@ a {
|
||||
.table td {
|
||||
padding: 2px 0;
|
||||
}
|
||||
.td-numeric {
|
||||
text-align: end;
|
||||
padding-right: 1em;
|
||||
}
|
||||
|
||||
.bg-application {
|
||||
background: #8FB847;
|
||||
|
@ -83,14 +83,15 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for server in crawl_server_stats %}
|
||||
{% for entry in crawl_server_stats %}
|
||||
{% set server, info = entry %}
|
||||
<tr>
|
||||
<td><b>{{ server }}</b></td>
|
||||
<td>{{ crawl_server_stats[server].task_count }}</td>
|
||||
<td>{{ crawl_server_stats[server].time|round(2) }}s</td>
|
||||
<td>{{ crawl_server_stats[server].time_avg|round(2) }}s</td>
|
||||
<td>{{ crawl_server_stats[server].file_count }}</td>
|
||||
<td>{{ crawl_server_stats[server].file_count_avg | round(2) }}</td>
|
||||
<td class="td-numeric">{{ info.task_count }}</td>
|
||||
<td class="td-numeric">{{ info.time | duration_format() }}</td>
|
||||
<td class="td-numeric">{{ info.time_avg | duration_format() }}</td>
|
||||
<td class="td-numeric">{{ info.file_count }}</td>
|
||||
<td class="td-numeric">{{ "%.2f" % info.file_count_avg }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
|
Loading…
x
Reference in New Issue
Block a user