Task logs now stored on main server

This commit is contained in:
Simon
2018-06-24 20:32:02 -04:00
parent 059d9fd366
commit 5fd00f22af
10 changed files with 77 additions and 98 deletions

View File

@@ -5,13 +5,15 @@ import sqlite3
class TaskResult:
def __init__(self, status_code=None, file_count=0, start_time=0, end_time=0, website_id=0, indexed_time=0):
def __init__(self, status_code=None, file_count=0, start_time=0,
end_time=0, website_id=0, indexed_time=0, server_name=""):
self.status_code = status_code
self.file_count = file_count
self.start_time = start_time
self.end_time = end_time
self.website_id = website_id
self.indexed_time = indexed_time
self.server_name = server_name
def to_json(self):
return {
@@ -139,11 +141,3 @@ class TaskManagerDatabase:
return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result]
def get_all_results(self):
with sqlite3.connect(self.db_path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) as conn:
cursor = conn.cursor()
cursor.execute("SELECT website_id, status_code, file_count, start_time, end_time, indexed_time "
"FROM TaskResult ORDER BY id ASC")
return [TaskResult(r[1], r[2], r[3].timestamp(), r[4].timestamp(), r[0], r[5].timestamp() if r[5] else None) for r in cursor.fetchall()]

View File

@@ -98,12 +98,5 @@ def pop_queued_tasks():
return Response(json_str, mimetype="application/json")
@app.route("/stats/")
@auth.login_required
def get_stats():
json_str = json.dumps(tm.get_stats())
return Response(json_str, mimetype="application/json")
if __name__ == "__main__":
app.run(port=config.CRAWL_SERVER_PORT, host="0.0.0.0", ssl_context="adhoc")

View File

@@ -36,9 +36,6 @@ class TaskManager:
def get_non_indexed_results(self):
return self.db.get_non_indexed_results()
def get_all_results(self):
return self.db.get_all_results()
def execute_queued_task(self):
if len(self.current_tasks) <= self.max_processes:
@@ -103,20 +100,4 @@ class TaskManager:
if task.website_id == task_result.website_id:
del current_tasks[i]
def get_stats(self):
task_results = self.get_all_results()
stats = dict()
if len(task_results) > 0:
stats["task_count"] = len(task_results)
stats["task_time"] = sum((task.end_time - task.start_time) for task in task_results)
stats["task_time_avg"] = stats["task_time"] / len(task_results)
stats["task_file_count"] = sum(task.file_count for task in task_results)
stats["task_file_count_avg"] = stats["task_file_count"] / len(task_results)
return stats