Crawler no longer crashes when website has no files

This commit is contained in:
Simon 2018-07-15 10:46:48 -04:00
parent e18ded7ac1
commit 112400886e
3 changed files with 15 additions and 13 deletions

2
app.py
View File

@ -562,7 +562,7 @@ def api_complete_task():
taskManager.complete_task(filename, task, task_result, name)
if os.path.exists(filename):
if filename and os.path.exists(filename):
os.remove(filename)
# TODO: handle callback here

View File

@ -51,13 +51,16 @@ class TaskManager:
}
filename = "./crawled/" + str(task_result.website_id) + ".json"
if os.path.exists(filename):
files = {
"file_list": open(filename)
}
else:
files = None
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
print("RESPONSE: " + r.text)
logger.info("RESPONSE: " + r.text)
if os.path.exists(filename):
os.remove(filename)
@ -89,17 +92,16 @@ class TaskManager:
result.start_time = datetime.utcnow().timestamp()
result.website_id = task.website_id
print("Starting task " + task.url)
logger.info("Starting task " + task.url)
crawler = RemoteDirectoryCrawler(task.url, config.CRAWL_SERVER_THREADS)
crawl_result = crawler.crawl_directory("./crawled/" + str(task.website_id) + ".json")
del crawler
result.file_count = crawl_result.file_count
result.status_code = crawl_result.status_code
result.end_time = datetime.utcnow().timestamp()
print("End task " + task.url)
logger.info("End task " + task.url)
return result, current_tasks
@ -113,9 +115,9 @@ class TaskManager:
task_result, current_tasks = result
print("Task completed, sending result to server")
print("Status code: " + task_result.status_code)
print("File count: " + str(task_result.file_count))
logger.info("Task completed, sending result to server")
logger.info("Status code: " + task_result.status_code)
logger.info("File count: " + str(task_result.file_count))
TaskManager.push_result(task_result)

View File

@ -63,9 +63,9 @@ class TaskManager:
def complete_task(self, file_list, task, task_result, crawler_name):
if file_list:
self.search.delete_docs(task_result.website_id)
if file_list:
def iter_lines():
with open(file_list, "r") as f: