mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 02:46:45 +00:00
File upload is made in small chunks
This commit is contained in:
parent
73afebec28
commit
898ffcf410
35
app.py
35
app.py
@ -562,15 +562,7 @@ def api_complete_task():
|
|||||||
|
|
||||||
if task:
|
if task:
|
||||||
|
|
||||||
if "file_list" in request.files:
|
filename = "./tmp/" + str(task_result.website_id) + ".json"
|
||||||
file = request.files['file_list']
|
|
||||||
filename = "./tmp/" + str(task_result.website_id) + ".json"
|
|
||||||
print("Saving temp file " + filename + " ...")
|
|
||||||
file.save(filename)
|
|
||||||
print("Done")
|
|
||||||
else:
|
|
||||||
filename = None
|
|
||||||
|
|
||||||
taskManager.complete_task(filename, task, task_result, name)
|
taskManager.complete_task(filename, task, task_result, name)
|
||||||
|
|
||||||
if filename and os.path.exists(filename):
|
if filename and os.path.exists(filename):
|
||||||
@ -585,6 +577,31 @@ def api_complete_task():
|
|||||||
return "No such task"
|
return "No such task"
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/task/upload", methods=["POST"])
|
||||||
|
def api_upload():
|
||||||
|
token = request.form.get("token")
|
||||||
|
website_id = request.form.get("website_id")
|
||||||
|
name = db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
if "file_list" in request.files:
|
||||||
|
file = request.files['file_list']
|
||||||
|
|
||||||
|
filename = "./tmp/" + str(website_id) + ".json"
|
||||||
|
|
||||||
|
if os.path.exists(filename):
|
||||||
|
print("Appending chunk to existing file...")
|
||||||
|
with open(filename, "ab") as f:
|
||||||
|
f.write(file.stream.read())
|
||||||
|
else:
|
||||||
|
print("Saving temp file " + filename + " ...")
|
||||||
|
file.save(filename)
|
||||||
|
print("Done")
|
||||||
|
return "ok"
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/website/by_url", methods=["GET"])
|
@app.route("/api/website/by_url", methods=["GET"])
|
||||||
def api_website_by_url():
|
def api_website_by_url():
|
||||||
token = request.args.get("token")
|
token = request.args.get("token")
|
||||||
|
@ -45,21 +45,31 @@ class TaskManager:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
logger.info("Uploading file list in small chunks")
|
||||||
|
filename = "./crawled/" + str(task_result.website_id) + ".json"
|
||||||
|
CHUNK_SIZE = 1000000 * 10
|
||||||
|
with open(filename) as f:
|
||||||
|
chunk = f.read(CHUNK_SIZE)
|
||||||
|
while chunk:
|
||||||
|
payload = {
|
||||||
|
"token": config.API_TOKEN,
|
||||||
|
"website_id": task_result.website_id
|
||||||
|
}
|
||||||
|
|
||||||
|
files = {
|
||||||
|
"file_list": chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
r = requests.post(config.SERVER_URL + "/task/upload", data=payload, files=files)
|
||||||
|
logger.info("RESPONSE: " + r.text)
|
||||||
|
chunk = f.read(CHUNK_SIZE)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"token": config.API_TOKEN,
|
"token": config.API_TOKEN,
|
||||||
"result": json.dumps(task_result.to_json())
|
"result": json.dumps(task_result.to_json())
|
||||||
}
|
}
|
||||||
|
|
||||||
filename = "./crawled/" + str(task_result.website_id) + ".json"
|
r = requests.post(config.SERVER_URL + "/task/complete", data=payload)
|
||||||
if os.path.exists(filename):
|
|
||||||
files = {
|
|
||||||
"file_list": open(filename)
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
files = None
|
|
||||||
|
|
||||||
r = requests.post(config.SERVER_URL + "/task/complete", data=payload, files=files)
|
|
||||||
|
|
||||||
logger.info("RESPONSE: " + r.text)
|
logger.info("RESPONSE: " + r.text)
|
||||||
|
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user