Use task_tracker for task tracking

This commit is contained in:
simon987
2019-03-24 20:21:43 -04:00
parent 00e3fd7340
commit 4ffe805b8d
29 changed files with 196 additions and 317 deletions

129
api.py
View File

@@ -1,87 +1,15 @@
import json
import os
from threading import Lock
from uuid import uuid4
from flask import request, abort, Response, send_file, session
from flask import request, abort, send_file, session
import common as oddb
import captcha
from callbacks import PostCrawlCallbackFactory
from database import Task, Website
import common as oddb
from database import Website
from search.search import InvalidQueryException
from tasks import TaskResult
uploadLock = Lock()
def setup_api(app):
@app.route("/api/task/complete", methods=["POST"])
def api_complete_task():
# TODO: task_tracker
token = request.form.get("token")
name = oddb.db.check_api_token(token)
if name:
tr = json.loads(request.form.get("result"))
oddb.logger.debug("Task result: " + str(tr))
task_result = TaskResult(tr["status_code"], tr["file_count"], tr["start_time"], tr["end_time"],
tr["website_id"])
oddb.logger.info("Task for " + str(task_result.website_id) + " completed by " + name)
task = oddb.db.complete_task(task_result.website_id, name)
if task:
filename = "./tmp/" + str(task_result.website_id) + ".json"
if not os.path.exists(filename):
filename = None
oddb.taskManager.complete_task(filename, task, task_result, name)
if filename and os.path.exists(filename):
os.remove(filename)
# Handle task callback
callback = PostCrawlCallbackFactory.get_callback(task)
if callback:
callback.run(task_result, oddb.search)
return "Successfully logged task result and indexed files"
else:
oddb.logger.error("ERROR: " + name + " indicated that task for " + str(task_result.website_id) +
" was completed but there is no such task in the database.")
return "No such task"
return abort(403)
@app.route("/api/task/upload", methods=["POST"])
def api_upload():
token = request.form.get("token")
name = oddb.db.check_api_token(token)
if name:
website_id = request.form.get("website_id")
oddb.logger.debug("Result part upload for '" + str(website_id) + "' by " + name)
if "file_list" in request.files:
file = request.files['file_list']
filename = "./tmp/" + str(website_id) + ".json"
# Read the file into memory cuz if the request fails
# no file is corrupted.
buf = file.stream.read()
# Write to file (create if not exists) when
# everything read successfully.
with uploadLock:
with open(filename, "a+b") as f:
f.write(buf)
oddb.logger.debug("Written chunk to file")
return "ok"
else:
return abort(403)
@app.route("/api/website/by_url", methods=["GET"])
def api_website_by_url():
@@ -126,52 +54,6 @@ def setup_api(app):
else:
return abort(403)
@app.route("/api/task/force_enqueue", methods=["POST"])
def api_task_enqueue():
try:
token = request.json["token"]
except KeyError:
return abort(400)
name = oddb.db.check_api_token(token)
if name:
task = Task(
request.json["website_id"],
request.json["url"],
request.json["priority"],
request.json["callback_type"],
json.dumps(request.json["callback_args"])
)
oddb.logger.info("API force enqueue by " + name + "\n(" + str(task.to_json()) + ")")
oddb.taskManager.queue_task(task)
return ""
else:
return abort(403)
@app.route("/api/task/try_enqueue", methods=["POST"])
def api_task_try_enqueue():
token = request.form.get("token")
name = oddb.db.check_api_token(token)
if name:
url = request.form.get("url")
# TODO: task_tracker
message, result = oddb.try_enqueue(url)
oddb.logger.info("API try enqueue '" + url + "' by " + name + " (" + message + ")")
return json.dumps({
"message": message,
"result": result
})
else:
return abort(403)
@app.route("/api/website/random")
def api_random_website():
token = request.json["token"]
@@ -215,9 +97,10 @@ def setup_api(app):
@app.route("/cap", methods=["GET"])
def cap():
word = captcha.make_captcha()
cap_id = uuid4()
cap_id = uuid4().__str__()
session["cap"] = cap_id
oddb.sessionStore[cap_id] = word
oddb.redis.set(cap_id, word)
return send_file(captcha.get_path(word), cache_timeout=0)