file lists now deleted after indexing

This commit is contained in:
Simon 2018-07-14 20:41:20 -04:00
parent 51a47b3628
commit f452d0f8b2
2 changed files with 7 additions and 2 deletions

View File

@ -119,7 +119,7 @@ class HttpDirectory(RemoteDirectory):
if self._isdir(anchor):
directory = File(
name=anchor.href,
name=anchor.href, # todo handle external links here
mtime=0,
size=0,
path=path,

View File

@ -1,4 +1,5 @@
from crawl_server import logger
import os
from tasks import TaskResult, Task
import config
import requests
@ -49,14 +50,18 @@ class TaskManager:
"result": json.dumps(task_result.to_json())
}
filename = "./crawled/" + str(task_result.website_id) + ".json"
files = {
"file_list": open("./crawled/" + str(task_result.website_id) + ".json")
"file_list": open(filename)
}
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
logger.info("RESPONSE: " + r.text)
if os.path.exists(filename):
os.remove(filename)
except Exception as e:
raise e