diff --git a/app.py b/app.py index a898063..5ec4cb7 100644 --- a/app.py +++ b/app.py @@ -562,7 +562,7 @@ def api_complete_task(): taskManager.complete_task(filename, task, task_result, name) - if os.path.exists(filename): + if filename and os.path.exists(filename): os.remove(filename) # TODO: handle callback here diff --git a/crawl_server/task_manager.py b/crawl_server/task_manager.py index 6a3d07f..aa5f71d 100644 --- a/crawl_server/task_manager.py +++ b/crawl_server/task_manager.py @@ -51,13 +51,16 @@ class TaskManager: } filename = "./crawled/" + str(task_result.website_id) + ".json" - files = { - "file_list": open(filename) - } + if os.path.exists(filename): + files = { + "file_list": open(filename) + } + else: + files = None r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files) - print("RESPONSE: " + r.text) + logger.info("RESPONSE: " + r.text) if os.path.exists(filename): os.remove(filename) @@ -89,17 +92,16 @@ class TaskManager: result.start_time = datetime.utcnow().timestamp() result.website_id = task.website_id - print("Starting task " + task.url) + logger.info("Starting task " + task.url) crawler = RemoteDirectoryCrawler(task.url, config.CRAWL_SERVER_THREADS) crawl_result = crawler.crawl_directory("./crawled/" + str(task.website_id) + ".json") - del crawler result.file_count = crawl_result.file_count result.status_code = crawl_result.status_code result.end_time = datetime.utcnow().timestamp() - print("End task " + task.url) + logger.info("End task " + task.url) return result, current_tasks @@ -113,9 +115,9 @@ class TaskManager: task_result, current_tasks = result - print("Task completed, sending result to server") - print("Status code: " + task_result.status_code) - print("File count: " + str(task_result.file_count)) + logger.info("Task completed, sending result to server") + logger.info("Status code: " + task_result.status_code) + logger.info("File count: " + str(task_result.file_count)) TaskManager.push_result(task_result) diff --git a/tasks.py b/tasks.py index e940404..d0057e5 100644 --- a/tasks.py +++ b/tasks.py @@ -63,9 +63,9 @@ class TaskManager: def complete_task(self, file_list, task, task_result, crawler_name): - if file_list: - self.search.delete_docs(task_result.website_id) + self.search.delete_docs(task_result.website_id) + if file_list: def iter_lines(): with open(file_list, "r") as f: