File upload is made in small chunks

This commit is contained in:
Simon 2018-07-17 17:52:17 -04:00
parent 73afebec28
commit 898ffcf410
2 changed files with 46 additions and 19 deletions

33
app.py
View File

@ -562,15 +562,7 @@ def api_complete_task():
if task:
if "file_list" in request.files:
file = request.files['file_list']
filename = "./tmp/" + str(task_result.website_id) + ".json"
print("Saving temp file " + filename + " ...")
file.save(filename)
print("Done")
else:
filename = None
taskManager.complete_task(filename, task, task_result, name)
if filename and os.path.exists(filename):
@ -585,6 +577,31 @@ def api_complete_task():
return "No such task"
@app.route("/api/task/upload", methods=["POST"])
def api_upload():
token = request.form.get("token")
website_id = request.form.get("website_id")
name = db.check_api_token(token)
if name:
if "file_list" in request.files:
file = request.files['file_list']
filename = "./tmp/" + str(website_id) + ".json"
if os.path.exists(filename):
print("Appending chunk to existing file...")
with open(filename, "ab") as f:
f.write(file.stream.read())
else:
print("Saving temp file " + filename + " ...")
file.save(filename)
print("Done")
return "ok"
else:
return abort(403)
@app.route("/api/website/by_url", methods=["GET"])
def api_website_by_url():
token = request.args.get("token")

View File

@ -45,21 +45,31 @@ class TaskManager:
try:
logger.info("Uploading file list in small chunks")
filename = "./crawled/" + str(task_result.website_id) + ".json"
CHUNK_SIZE = 1000000 * 10
with open(filename) as f:
chunk = f.read(CHUNK_SIZE)
while chunk:
payload = {
"token": config.API_TOKEN,
"website_id": task_result.website_id
}
files = {
"file_list": chunk
}
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
logger.info("RESPONSE: " + r.text)
chunk = f.read(CHUNK_SIZE)
payload = {
"token": config.API_TOKEN,
"result": json.dumps(task_result.to_json())
}
filename = "./crawled/" + str(task_result.website_id) + ".json"
if os.path.exists(filename):
files = {
"file_list": open(filename)
}
else:
files = None
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
r = requests.post(config.SERVER_URL + "/task/complete", data=payload)
logger.info("RESPONSE: " + r.text)
if os.path.exists(filename):