From f452d0f8b2456e9161beb63b14b62730c3253b43 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 14 Jul 2018 20:41:20 -0400 Subject: [PATCH] file lists now deleted after indexing --- crawl_server/remote_http.py | 2 +- crawl_server/task_manager.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crawl_server/remote_http.py b/crawl_server/remote_http.py index 5539ec1..c3a3083 100644 --- a/crawl_server/remote_http.py +++ b/crawl_server/remote_http.py @@ -119,7 +119,7 @@ class HttpDirectory(RemoteDirectory): if self._isdir(anchor): directory = File( - name=anchor.href, + name=anchor.href, # todo handle external links here mtime=0, size=0, path=path, diff --git a/crawl_server/task_manager.py b/crawl_server/task_manager.py index d11e98b..19ec425 100644 --- a/crawl_server/task_manager.py +++ b/crawl_server/task_manager.py @@ -1,4 +1,5 @@ from crawl_server import logger +import os from tasks import TaskResult, Task import config import requests @@ -49,14 +50,18 @@ class TaskManager: "result": json.dumps(task_result.to_json()) } + filename = "./crawled/" + str(task_result.website_id) + ".json" files = { - "file_list": open("./crawled/" + str(task_result.website_id) + ".json") + "file_list": open(filename) } r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files) logger.info("RESPONSE: " + r.text) + if os.path.exists(filename): + os.remove(filename) + except Exception as e: raise e