mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 10:26:44 +00:00
Misc bug fixes
This commit is contained in:
parent
df5b01dc83
commit
004ade8935
@ -47,22 +47,26 @@ class TaskManager:
|
|||||||
|
|
||||||
logger.info("Uploading file list in small chunks")
|
logger.info("Uploading file list in small chunks")
|
||||||
filename = "./crawled/" + str(task_result.website_id) + ".json"
|
filename = "./crawled/" + str(task_result.website_id) + ".json"
|
||||||
CHUNK_SIZE = 1000000 * 10
|
CHUNK_SIZE = 500000 * 10 # 5Mb
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
with open(filename) as f:
|
with open(filename) as f:
|
||||||
chunk = f.read(CHUNK_SIZE)
|
chunk = f.read(CHUNK_SIZE)
|
||||||
while chunk:
|
while chunk:
|
||||||
payload = {
|
try:
|
||||||
"token": config.API_TOKEN,
|
payload = {
|
||||||
"website_id": task_result.website_id
|
"token": config.API_TOKEN,
|
||||||
}
|
"website_id": task_result.website_id
|
||||||
|
}
|
||||||
|
|
||||||
files = {
|
files = {
|
||||||
"file_list": chunk
|
"file_list": chunk
|
||||||
}
|
}
|
||||||
|
|
||||||
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
|
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
|
||||||
logger.info("RESPONSE: " + r.text)
|
logger.info("RESPONSE: " + r.text)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Exception while sending file_list chunk: " + str(e))
|
||||||
|
pass
|
||||||
chunk = f.read(CHUNK_SIZE)
|
chunk = f.read(CHUNK_SIZE)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
|
@ -418,7 +418,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
"query": {
|
"query": {
|
||||||
"match_all": {}
|
"match_all": {}
|
||||||
}
|
}
|
||||||
}, scroll="5m", client=self.es, index=self.index_name)
|
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=60)
|
||||||
|
|
||||||
def are_empty(self, websites):
|
def are_empty(self, websites):
|
||||||
result = self.es.search(body={
|
result = self.es.search(body={
|
||||||
|
Loading…
x
Reference in New Issue
Block a user