mirror of
https://github.com/simon987/od-database.git
synced 2025-04-18 01:46:46 +00:00
Added crawl logs page
This commit is contained in:
parent
99d64b658b
commit
400abc9a3c
20
app.py
20
app.py
@ -26,11 +26,17 @@ taskDispatcher = TaskDispatcher()
|
||||
searchEngine = ElasticSearchEngine("od-database")
|
||||
|
||||
|
||||
@app.template_filter("datetime_format")
|
||||
@app.template_filter("date_format")
|
||||
def datetime_format(value, format='%Y-%m-%d'):
|
||||
return time.strftime(format, time.gmtime(value))
|
||||
|
||||
|
||||
@app.template_filter("datetime_format")
|
||||
def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
||||
return time.strftime(format, time.gmtime(value))
|
||||
|
||||
|
||||
|
||||
@app.route("/dl")
|
||||
def downloads():
|
||||
|
||||
@ -293,6 +299,18 @@ def admin_del_token():
|
||||
return abort(403)
|
||||
|
||||
|
||||
@app.route("/logs", methods=["GET"])
|
||||
def admin_crawl_logs():
|
||||
if "username" in session:
|
||||
|
||||
results = taskDispatcher.get_task_logs_by_server()
|
||||
print(results)
|
||||
|
||||
return render_template("crawl_logs.html", logs=results)
|
||||
else:
|
||||
return abort(403)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if config.USE_SSL:
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||
|
@ -5,12 +5,13 @@ import sqlite3
|
||||
|
||||
class TaskResult:
|
||||
|
||||
def __init__(self, status_code=None, file_count=0, start_time=0, end_time=0, website_id=0):
|
||||
def __init__(self, status_code=None, file_count=0, start_time=0, end_time=0, website_id=0, indexed_time=0):
|
||||
self.status_code = status_code
|
||||
self.file_count = file_count
|
||||
self.start_time = start_time
|
||||
self.end_time = end_time
|
||||
self.website_id = website_id
|
||||
self.indexed_time = indexed_time
|
||||
|
||||
def to_json(self):
|
||||
return {
|
||||
@ -18,7 +19,8 @@ class TaskResult:
|
||||
"file_count": self.file_count,
|
||||
"start_time": self.start_time,
|
||||
"end_time": self.end_time,
|
||||
"website_id": self.website_id
|
||||
"website_id": self.website_id,
|
||||
"indexed_time": self.indexed_time
|
||||
}
|
||||
|
||||
|
||||
@ -126,3 +128,12 @@ class TaskManagerDatabase:
|
||||
conn.commit()
|
||||
|
||||
return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result]
|
||||
|
||||
def get_all_results(self):
|
||||
|
||||
with sqlite3.connect(self.db_path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("SELECT website_id, status_code, file_count, start_time, end_time, indexed_time "
|
||||
"FROM TaskResult ORDER BY id ASC")
|
||||
return [TaskResult(r[1], r[2], r[3].timestamp(), r[4].timestamp(), r[0], r[5].timestamp() if r[5] else None) for r in cursor.fetchall()]
|
||||
|
@ -72,5 +72,13 @@ def get_file_list(website_id):
|
||||
return abort(404)
|
||||
|
||||
|
||||
@app.route("/task/logs/")
|
||||
@auth.login_required
|
||||
def get_task_logs():
|
||||
|
||||
json_str = json.dumps([result.to_json() for result in tm.get_all_results()])
|
||||
return json_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5001, host="0.0.0.0")
|
||||
|
@ -13,7 +13,7 @@ CREATE TABLE TaskResult (
|
||||
website_id INT,
|
||||
status_code TEXT,
|
||||
file_count INT,
|
||||
start_time INT,
|
||||
end_time INT,
|
||||
indexed_time INT DEFAULT NULL
|
||||
start_time TIMESTAMP,
|
||||
end_time TIMESTAMP,
|
||||
indexed_time TIMESTAMP DEFAULT NULL
|
||||
);
|
@ -32,6 +32,9 @@ class TaskManager:
|
||||
def get_non_indexed_results(self):
|
||||
return self.db.get_non_indexed_results()
|
||||
|
||||
def get_all_results(self):
|
||||
return self.db.get_all_results()
|
||||
|
||||
def execute_queued_task(self):
|
||||
|
||||
if len(self.current_tasks) <= self.max_processes:
|
||||
|
19
task.py
19
task.py
@ -70,6 +70,16 @@ class CrawlServer:
|
||||
except ConnectionError:
|
||||
return ""
|
||||
|
||||
def fetch_crawl_logs(self):
|
||||
|
||||
try:
|
||||
r = requests.get(self.url + "/task/logs/", headers=CrawlServer.headers)
|
||||
return [
|
||||
TaskResult(r["status_code"], r["file_count"], r["start_time"], r["end_time"], r["website_id"], r["indexed_time"])
|
||||
for r in json.loads(r.text)]
|
||||
except ConnectionError:
|
||||
return []
|
||||
|
||||
|
||||
class TaskDispatcher:
|
||||
|
||||
@ -119,4 +129,13 @@ class TaskDispatcher:
|
||||
|
||||
return current_tasks
|
||||
|
||||
def get_task_logs_by_server(self) -> dict:
|
||||
|
||||
task_logs = dict()
|
||||
|
||||
for server in self.crawl_servers:
|
||||
task_logs[server.url] = server.fetch_crawl_logs()
|
||||
|
||||
return task_logs
|
||||
|
||||
|
||||
|
38
templates/crawl_logs.html
Normal file
38
templates/crawl_logs.html
Normal file
@ -0,0 +1,38 @@
|
||||
{% extends "layout.html" %}
|
||||
{% set title = "Crawl logs - OD-Database" %}
|
||||
|
||||
{% block body %}
|
||||
<div class="container-fluid">
|
||||
|
||||
<table class="table table-striped">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Server</th>
|
||||
<th>Website</th>
|
||||
<th>Status code</th>
|
||||
<th>File count</th>
|
||||
<th>Start</th>
|
||||
<th>End</th>
|
||||
<th>Index</th>
|
||||
</tr>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
{% for server in logs %}
|
||||
{% for task_result in logs[server] %}
|
||||
<tr>
|
||||
<td>{{ server }}</td>
|
||||
<td><a href="/website/{{ task_result.website_id }}/">#{{ task_result.website_id }}</a></td>
|
||||
<td>{{ task_result.status_code }}</td>
|
||||
<td>{{ task_result.file_count }}</td>
|
||||
<td>{{ task_result.start_time | datetime_format }}</td>
|
||||
<td>{{ task_result.end_time | datetime_format }}</td>
|
||||
<td>{{ task_result.indexed_time | datetime_format }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</div>
|
||||
{% endblock body %}
|
@ -25,7 +25,7 @@
|
||||
<tr>
|
||||
<td><a href="/get_export">out.csv.xz</a></td>
|
||||
<td>{{ export_file_stats.st_size |filesizeformat }}</td>
|
||||
<td>{{ export_file_stats.st_mtime|datetime_format }}</td>
|
||||
<td>{{ export_file_stats.st_mtime|date_format }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
|
@ -77,7 +77,7 @@
|
||||
{# File size & date #}
|
||||
<td style="white-space: nowrap; vertical-align: top; text-align: right; font-size: 14px">
|
||||
<div>{{ src["size"] | filesizeformat if src["size"] >= 0 else "?" }}</div>
|
||||
<code>{{ src["mtime"] | datetime_format }}</code>
|
||||
<code>{{ src["mtime"] | date_format }}</code>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
Loading…
x
Reference in New Issue
Block a user