Completed tasks are now fetched by the TaskDispatcher

This commit is contained in:
Simon 2018-06-12 14:16:05 -04:00
parent d61fd75890
commit 6c912ea8c5
2 changed files with 15 additions and 5 deletions

View File

@ -116,9 +116,11 @@ class TaskManagerDatabase:
cursor = conn.cursor()
cursor.execute("SELECT status_code, file_count, start_time, end_time, website_id"
" FROM TaskResult WHERE indexed_time != NULL")
" FROM TaskResult WHERE indexed_time IS NULL")
db_result = cursor.fetchall()
print(len(db_result))
cursor.execute("UPDATE TaskResult SET indexed_time = CURRENT_TIMESTAMP")
cursor.execute("UPDATE TaskResult SET indexed_time = CURRENT_TIMESTAMP WHERE indexed_time IS NULL")
conn.commit()
return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result]
return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result]

12
task.py
View File

@ -26,7 +26,9 @@ class CrawlServer:
def get_completed_tasks(self) -> list:
r = requests.get(self.url + "/task/completed")
return []
return [
TaskResult(r["status_code"], r["file_count"], r["start_time"], r["end_time"], r["website_id"])
for r in json.loads(r.text)]
def get_queued_tasks(self) -> list:
@ -62,7 +64,13 @@ class TaskDispatcher:
]
def check_completed_tasks(self):
return self._get_available_crawl_server().get_completed_tasks()
completed_tasks = []
for server in self.crawl_servers:
completed_tasks.extend(server.get_completed_tasks())
if completed_tasks:
print(str(len(completed_tasks)) + " completed tasks. Will index immediately")
def dispatch_task(self, task: Task):
self._get_available_crawl_server().queue_task(task)