mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Crawler no longer crashes when website has no files
This commit is contained in:
parent
e18ded7ac1
commit
112400886e
2
app.py
2
app.py
@ -562,7 +562,7 @@ def api_complete_task():
|
||||
|
||||
taskManager.complete_task(filename, task, task_result, name)
|
||||
|
||||
if os.path.exists(filename):
|
||||
if filename and os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
|
||||
# TODO: handle callback here
|
||||
|
@ -51,13 +51,16 @@ class TaskManager:
|
||||
}
|
||||
|
||||
filename = "./crawled/" + str(task_result.website_id) + ".json"
|
||||
if os.path.exists(filename):
|
||||
files = {
|
||||
"file_list": open(filename)
|
||||
}
|
||||
else:
|
||||
files = None
|
||||
|
||||
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
|
||||
|
||||
print("RESPONSE: " + r.text)
|
||||
logger.info("RESPONSE: " + r.text)
|
||||
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
@ -89,17 +92,16 @@ class TaskManager:
|
||||
result.start_time = datetime.utcnow().timestamp()
|
||||
result.website_id = task.website_id
|
||||
|
||||
print("Starting task " + task.url)
|
||||
logger.info("Starting task " + task.url)
|
||||
|
||||
crawler = RemoteDirectoryCrawler(task.url, config.CRAWL_SERVER_THREADS)
|
||||
crawl_result = crawler.crawl_directory("./crawled/" + str(task.website_id) + ".json")
|
||||
del crawler
|
||||
|
||||
result.file_count = crawl_result.file_count
|
||||
result.status_code = crawl_result.status_code
|
||||
|
||||
result.end_time = datetime.utcnow().timestamp()
|
||||
print("End task " + task.url)
|
||||
logger.info("End task " + task.url)
|
||||
|
||||
return result, current_tasks
|
||||
|
||||
@ -113,9 +115,9 @@ class TaskManager:
|
||||
|
||||
task_result, current_tasks = result
|
||||
|
||||
print("Task completed, sending result to server")
|
||||
print("Status code: " + task_result.status_code)
|
||||
print("File count: " + str(task_result.file_count))
|
||||
logger.info("Task completed, sending result to server")
|
||||
logger.info("Status code: " + task_result.status_code)
|
||||
logger.info("File count: " + str(task_result.file_count))
|
||||
|
||||
TaskManager.push_result(task_result)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user