mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Crawl stats: time format + sorting (#10)
* Nicer stats * Fix right align * No leading day zeros * Fix right-align padding
This commit is contained in:
parent
e89eb6e3e0
commit
1ac3b97d7e
13
app.py
13
app.py
@ -61,6 +61,19 @@ def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
|||||||
return time.strftime(format, time.gmtime(value))
|
return time.strftime(format, time.gmtime(value))
|
||||||
|
|
||||||
|
|
||||||
|
@app.template_filter("duration_format")
|
||||||
|
def duration_format(value):
|
||||||
|
delay = datetime.timedelta(seconds=value)
|
||||||
|
if (delay.days > 0):
|
||||||
|
out = str(delay).replace(" days, ", ":")
|
||||||
|
else:
|
||||||
|
out = str(delay)
|
||||||
|
out_ar = out.split(':')
|
||||||
|
out_ar = ["%02d" % (int(float(x))) for x in out_ar]
|
||||||
|
out = ":".join(out_ar)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter("from_timestamp")
|
@app.template_filter("from_timestamp")
|
||||||
def from_timestamp(value):
|
def from_timestamp(value):
|
||||||
return datetime.datetime.fromtimestamp(value)
|
return datetime.datetime.fromtimestamp(value)
|
||||||
|
17
database.py
17
database.py
@ -334,18 +334,21 @@ class Database:
|
|||||||
return [tasks.TaskResult(r[1], r[2], r[3], r[4], r[0], str(r[5])) for r in cursor.fetchall()]
|
return [tasks.TaskResult(r[1], r[2], r[3], r[4], r[0], str(r[5])) for r in cursor.fetchall()]
|
||||||
|
|
||||||
def get_stats_by_crawler(self):
|
def get_stats_by_crawler(self):
|
||||||
stats = dict()
|
stats = []
|
||||||
task_results = self.get_crawl_logs()
|
task_results = self.get_crawl_logs()
|
||||||
|
|
||||||
for crawler in self.get_tokens():
|
for crawler in self.get_tokens():
|
||||||
task_count = sum(1 for result in task_results if result.server_name == crawler.name)
|
task_count = sum(1 for result in task_results if result.server_name == crawler.name)
|
||||||
if task_count > 0:
|
if task_count > 0:
|
||||||
stats[crawler.name] = dict()
|
info = dict()
|
||||||
stats[crawler.name]["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
|
info["file_count"] = sum(result.file_count for result in task_results if result.server_name == crawler.name)
|
||||||
stats[crawler.name]["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
|
info["time"] = sum((result.end_time - result.start_time) for result in task_results if result.server_name == crawler.name)
|
||||||
stats[crawler.name]["task_count"] = task_count
|
info["task_count"] = task_count
|
||||||
stats[crawler.name]["time_avg"] = stats[crawler.name]["time"] / task_count
|
info["time_avg"] = info["time"] / task_count
|
||||||
stats[crawler.name]["file_count_avg"] = stats[crawler.name]["file_count"] / task_count
|
info["file_count_avg"] = info["file_count"] / task_count
|
||||||
|
stats.append((crawler.name, info))
|
||||||
|
|
||||||
|
stats.sort(key=lambda t: t[1]["file_count"], reverse=True)
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
@ -19,6 +19,10 @@ a {
|
|||||||
.table td {
|
.table td {
|
||||||
padding: 2px 0;
|
padding: 2px 0;
|
||||||
}
|
}
|
||||||
|
.td-numeric {
|
||||||
|
text-align: end;
|
||||||
|
padding-right: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
.bg-application {
|
.bg-application {
|
||||||
background: #8FB847;
|
background: #8FB847;
|
||||||
|
@ -83,14 +83,15 @@
|
|||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for server in crawl_server_stats %}
|
{% for entry in crawl_server_stats %}
|
||||||
|
{% set server, info = entry %}
|
||||||
<tr>
|
<tr>
|
||||||
<td><b>{{ server }}</b></td>
|
<td><b>{{ server }}</b></td>
|
||||||
<td>{{ crawl_server_stats[server].task_count }}</td>
|
<td class="td-numeric">{{ info.task_count }}</td>
|
||||||
<td>{{ crawl_server_stats[server].time|round(2) }}s</td>
|
<td class="td-numeric">{{ info.time | duration_format() }}</td>
|
||||||
<td>{{ crawl_server_stats[server].time_avg|round(2) }}s</td>
|
<td class="td-numeric">{{ info.time_avg | duration_format() }}</td>
|
||||||
<td>{{ crawl_server_stats[server].file_count }}</td>
|
<td class="td-numeric">{{ info.file_count }}</td>
|
||||||
<td>{{ crawl_server_stats[server].file_count_avg | round(2) }}</td>
|
<td class="td-numeric">{{ "%.2f" % info.file_count_avg }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user