From 6c912ea8c5fd8a903e6a573681d084052c73dc24 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 12 Jun 2018 14:16:05 -0400 Subject: [PATCH] Completed tasks are now fetched by the TaskDispatcher --- crawl_server/database.py | 8 +++++--- task.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crawl_server/database.py b/crawl_server/database.py index 1fb84ef..dfff4d1 100644 --- a/crawl_server/database.py +++ b/crawl_server/database.py @@ -116,9 +116,11 @@ class TaskManagerDatabase: cursor = conn.cursor() cursor.execute("SELECT status_code, file_count, start_time, end_time, website_id" - " FROM TaskResult WHERE indexed_time != NULL") + " FROM TaskResult WHERE indexed_time IS NULL") db_result = cursor.fetchall() + print(len(db_result)) - cursor.execute("UPDATE TaskResult SET indexed_time = CURRENT_TIMESTAMP") + cursor.execute("UPDATE TaskResult SET indexed_time = CURRENT_TIMESTAMP WHERE indexed_time IS NULL") + conn.commit() - return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result] + return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result] diff --git a/task.py b/task.py index 46a517c..4f9fc5f 100644 --- a/task.py +++ b/task.py @@ -26,7 +26,9 @@ class CrawlServer: def get_completed_tasks(self) -> list: r = requests.get(self.url + "/task/completed") - return [] + return [ + TaskResult(r["status_code"], r["file_count"], r["start_time"], r["end_time"], r["website_id"]) + for r in json.loads(r.text)] def get_queued_tasks(self) -> list: @@ -62,7 +64,13 @@ class TaskDispatcher: ] def check_completed_tasks(self): - return self._get_available_crawl_server().get_completed_tasks() + completed_tasks = [] + + for server in self.crawl_servers: + completed_tasks.extend(server.get_completed_tasks()) + + if completed_tasks: + print(str(len(completed_tasks)) + " completed tasks. Will index immediately") def dispatch_task(self, task: Task): self._get_available_crawl_server().queue_task(task)