Misc bug fixes

This commit is contained in:
Simon 2018-07-20 10:35:17 -04:00
parent df5b01dc83
commit 004ade8935
2 changed files with 15 additions and 11 deletions

View File

@ -47,22 +47,26 @@ class TaskManager:
logger.info("Uploading file list in small chunks") logger.info("Uploading file list in small chunks")
filename = "./crawled/" + str(task_result.website_id) + ".json" filename = "./crawled/" + str(task_result.website_id) + ".json"
CHUNK_SIZE = 1000000 * 10 CHUNK_SIZE = 500000 * 10 # 5Mb
if os.path.exists(filename): if os.path.exists(filename):
with open(filename) as f: with open(filename) as f:
chunk = f.read(CHUNK_SIZE) chunk = f.read(CHUNK_SIZE)
while chunk: while chunk:
payload = { try:
"token": config.API_TOKEN, payload = {
"website_id": task_result.website_id "token": config.API_TOKEN,
} "website_id": task_result.website_id
}
files = { files = {
"file_list": chunk "file_list": chunk
} }
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files) r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
logger.info("RESPONSE: " + r.text) logger.info("RESPONSE: " + r.text)
except Exception as e:
logger.error("Exception while sending file_list chunk: " + str(e))
pass
chunk = f.read(CHUNK_SIZE) chunk = f.read(CHUNK_SIZE)
payload = { payload = {

View File

@ -418,7 +418,7 @@ class ElasticSearchEngine(SearchEngine):
"query": { "query": {
"match_all": {} "match_all": {}
} }
}, scroll="5m", client=self.es, index=self.index_name) }, scroll="1m", client=self.es, index=self.index_name, request_timeout=60)
def are_empty(self, websites): def are_empty(self, websites):
result = self.es.search(body={ result = self.es.search(body={