file lists now deleted after indexing

This commit is contained in:
Simon 2018-07-14 20:41:20 -04:00
parent 51a47b3628
commit f452d0f8b2
2 changed files with 7 additions and 2 deletions

View File

@ -119,7 +119,7 @@ class HttpDirectory(RemoteDirectory):
if self._isdir(anchor): if self._isdir(anchor):
directory = File( directory = File(
name=anchor.href, name=anchor.href, # todo handle external links here
mtime=0, mtime=0,
size=0, size=0,
path=path, path=path,

View File

@ -1,4 +1,5 @@
from crawl_server import logger from crawl_server import logger
import os
from tasks import TaskResult, Task from tasks import TaskResult, Task
import config import config
import requests import requests
@ -49,14 +50,18 @@ class TaskManager:
"result": json.dumps(task_result.to_json()) "result": json.dumps(task_result.to_json())
} }
filename = "./crawled/" + str(task_result.website_id) + ".json"
files = { files = {
"file_list": open("./crawled/" + str(task_result.website_id) + ".json") "file_list": open(filename)
} }
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files) r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
logger.info("RESPONSE: " + r.text) logger.info("RESPONSE: " + r.text)
if os.path.exists(filename):
os.remove(filename)
except Exception as e: except Exception as e:
raise e raise e