From 004ade8935b8bca4092fc53f46788548c8d2df45 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 20 Jul 2018 10:35:17 -0400 Subject: [PATCH] Misc bug fixes --- crawl_server/task_manager.py | 24 ++++++++++++++---------- search/search.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/crawl_server/task_manager.py b/crawl_server/task_manager.py index 39cb2fc..e95e9b5 100644 --- a/crawl_server/task_manager.py +++ b/crawl_server/task_manager.py @@ -47,22 +47,26 @@ class TaskManager: logger.info("Uploading file list in small chunks") filename = "./crawled/" + str(task_result.website_id) + ".json" - CHUNK_SIZE = 1000000 * 10 + CHUNK_SIZE = 500000 * 10 # 5Mb if os.path.exists(filename): with open(filename) as f: chunk = f.read(CHUNK_SIZE) while chunk: - payload = { - "token": config.API_TOKEN, - "website_id": task_result.website_id - } + try: + payload = { + "token": config.API_TOKEN, + "website_id": task_result.website_id + } - files = { - "file_list": chunk - } + files = { + "file_list": chunk + } - r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files) - logger.info("RESPONSE: " + r.text) + r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files) + logger.info("RESPONSE: " + r.text) + except Exception as e: + logger.error("Exception while sending file_list chunk: " + str(e)) + pass chunk = f.read(CHUNK_SIZE) payload = { diff --git a/search/search.py b/search/search.py index f537718..d73689d 100644 --- a/search/search.py +++ b/search/search.py @@ -418,7 +418,7 @@ class ElasticSearchEngine(SearchEngine): "query": { "match_all": {} } - }, scroll="5m", client=self.es, index=self.index_name) + }, scroll="1m", client=self.es, index=self.index_name, request_timeout=60) def are_empty(self, websites): result = self.es.search(body={