Misc bug fixes

This commit is contained in:
Simon 2018-07-20 10:35:17 -04:00
parent df5b01dc83
commit 004ade8935
2 changed files with 15 additions and 11 deletions

View File

@ -47,11 +47,12 @@ class TaskManager:
logger.info("Uploading file list in small chunks") logger.info("Uploading file list in small chunks")
filename = "./crawled/" + str(task_result.website_id) + ".json" filename = "./crawled/" + str(task_result.website_id) + ".json"
CHUNK_SIZE = 1000000 * 10 CHUNK_SIZE = 500000 * 10 # 5Mb
if os.path.exists(filename): if os.path.exists(filename):
with open(filename) as f: with open(filename) as f:
chunk = f.read(CHUNK_SIZE) chunk = f.read(CHUNK_SIZE)
while chunk: while chunk:
try:
payload = { payload = {
"token": config.API_TOKEN, "token": config.API_TOKEN,
"website_id": task_result.website_id "website_id": task_result.website_id
@ -63,6 +64,9 @@ class TaskManager:
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files) r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
logger.info("RESPONSE: " + r.text) logger.info("RESPONSE: " + r.text)
except Exception as e:
logger.error("Exception while sending file_list chunk: " + str(e))
pass
chunk = f.read(CHUNK_SIZE) chunk = f.read(CHUNK_SIZE)
payload = { payload = {

View File

@ -418,7 +418,7 @@ class ElasticSearchEngine(SearchEngine):
"query": { "query": {
"match_all": {} "match_all": {}
} }
}, scroll="5m", client=self.es, index=self.index_name) }, scroll="1m", client=self.es, index=self.index_name, request_timeout=60)
def are_empty(self, websites): def are_empty(self, websites):
result = self.es.search(body={ result = self.es.search(body={