mirror of
https://github.com/simon987/od-database.git
synced 2025-04-18 01:46:46 +00:00
261 lines
8.7 KiB
Python
261 lines
8.7 KiB
Python
import json
|
|
import os
|
|
from threading import Lock
|
|
|
|
from flask import request, abort, Response, send_file, session
|
|
|
|
import common as oddb
|
|
import captcha
|
|
from callbacks import PostCrawlCallbackFactory
|
|
from database import Task, Website
|
|
from search.search import InvalidQueryException
|
|
from tasks import TaskResult
|
|
|
|
uploadLock = Lock()
|
|
|
|
|
|
def setup_api(app):
|
|
@app.route("/api/task/get", methods=["POST"])
|
|
def api_get_task():
|
|
token = request.form.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
accept_ftp = request.form.get("accept") == "ftp" if "accept" in request.form else False
|
|
|
|
if name:
|
|
task = oddb.db.pop_task(name, accept_ftp)
|
|
oddb.logger.debug("API get task from " + name)
|
|
|
|
if task:
|
|
oddb.logger.info("Assigning task " + str(task.to_json()) + " to " + name)
|
|
else:
|
|
oddb.logger.info("No queued tasks, creating a new one")
|
|
|
|
try:
|
|
task = oddb.db.make_task_for_oldest()
|
|
except:
|
|
oddb.logger.error("Couldn't create new task")
|
|
abort(404)
|
|
|
|
return Response(str(task), mimetype="application/json")
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/task/cancel", methods=["POST"])
|
|
def api_cancel_task():
|
|
token = request.form.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
website_id = request.form.get("website_id") if "website_id" in request.form else None
|
|
if website_id:
|
|
oddb.logger.debug("API task cancel for " + str(website_id) + " by " + name)
|
|
oddb.db.delete_task(website_id)
|
|
return Response("cancelled task")
|
|
else:
|
|
abort(400)
|
|
|
|
else:
|
|
abort(403)
|
|
|
|
@app.route("/api/task/complete", methods=["POST"])
|
|
def api_complete_task():
|
|
token = request.form.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
tr = json.loads(request.form.get("result"))
|
|
oddb.logger.debug("Task result: " + str(tr))
|
|
task_result = TaskResult(tr["status_code"], tr["file_count"], tr["start_time"], tr["end_time"],
|
|
tr["website_id"])
|
|
|
|
oddb.logger.info("Task for " + str(task_result.website_id) + " completed by " + name)
|
|
task = oddb.db.complete_task(task_result.website_id, name)
|
|
|
|
if task:
|
|
|
|
filename = "./tmp/" + str(task_result.website_id) + ".json"
|
|
if not os.path.exists(filename):
|
|
filename = None
|
|
oddb.taskManager.complete_task(filename, task, task_result, name)
|
|
|
|
if filename and os.path.exists(filename):
|
|
os.remove(filename)
|
|
|
|
# Handle task callback
|
|
callback = PostCrawlCallbackFactory.get_callback(task)
|
|
if callback:
|
|
callback.run(task_result, oddb.search)
|
|
|
|
return "Successfully logged task result and indexed files"
|
|
|
|
else:
|
|
oddb.logger.error("ERROR: " + name + " indicated that task for " + str(task_result.website_id) +
|
|
" was completed but there is no such task in the database.")
|
|
return "No such task"
|
|
return abort(403)
|
|
|
|
@app.route("/api/task/upload", methods=["POST"])
|
|
def api_upload():
|
|
token = request.form.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
website_id = request.form.get("website_id")
|
|
oddb.logger.debug("Result part upload for '" + str(website_id) + "' by " + name)
|
|
|
|
if "file_list" in request.files:
|
|
file = request.files['file_list']
|
|
|
|
filename = "./tmp/" + str(website_id) + ".json"
|
|
|
|
# Read the file into memory cuz if the request fails
|
|
# no file is corrupted.
|
|
buf = file.stream.read()
|
|
|
|
# Write to file (create if not exists) when
|
|
# everything read successfully.
|
|
with uploadLock:
|
|
with open(filename, "a+b") as f:
|
|
f.write(buf)
|
|
|
|
oddb.logger.debug("Written chunk to file")
|
|
return "ok"
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/website/by_url", methods=["GET"])
|
|
def api_website_by_url():
|
|
token = request.args.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
url = request.args.get("url")
|
|
website = oddb.db.get_website_by_url(url)
|
|
oddb.logger.info("API get website by url '" + url + "' by " + name)
|
|
if website:
|
|
return str(website.id)
|
|
return abort(404)
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/website/blacklisted", methods=["GET"])
|
|
def api_website_is_blacklisted():
|
|
token = request.args.get("token")
|
|
url = request.args.get("url")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
oddb.logger.info("API get website is blacklisted '" + url + "' by " + name)
|
|
return str(oddb.db.is_blacklisted(url))
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/website/add", methods=["GET"])
|
|
def api_add_website():
|
|
token = request.args.get("token")
|
|
url = request.args.get("url")
|
|
|
|
name = oddb.db.check_api_token(token)
|
|
if name:
|
|
|
|
website_id = oddb.db.insert_website(Website(url, str(request.remote_addr + "_" +
|
|
request.headers.get("X-Forwarded-For", "")),
|
|
"API_CLIENT_" + name))
|
|
oddb.logger.info("API add website '" + url + "' by " + name + "(" + str(website_id) + ")")
|
|
return str(website_id)
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/task/force_enqueue", methods=["POST"])
|
|
def api_task_enqueue():
|
|
try:
|
|
token = request.json["token"]
|
|
except KeyError:
|
|
return abort(400)
|
|
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
|
|
task = Task(
|
|
request.json["website_id"],
|
|
request.json["url"],
|
|
request.json["priority"],
|
|
request.json["callback_type"],
|
|
json.dumps(request.json["callback_args"])
|
|
)
|
|
|
|
oddb.logger.info("API force enqueue by " + name + "\n(" + str(task.to_json()) + ")")
|
|
|
|
oddb.taskManager.queue_task(task)
|
|
return ""
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/task/try_enqueue", methods=["POST"])
|
|
def api_task_try_enqueue():
|
|
token = request.form.get("token")
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
|
|
url = request.form.get("url")
|
|
message, result = oddb.try_enqueue(url)
|
|
|
|
oddb.logger.info("API try enqueue '" + url + "' by " + name + " (" + message + ")")
|
|
|
|
return json.dumps({
|
|
"message": message,
|
|
"result": result
|
|
})
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/website/random")
|
|
def api_random_website():
|
|
token = request.json["token"]
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
oddb.logger.info("API get random website by " + name)
|
|
return str(oddb.db.get_random_website_id())
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/api/search", methods=["POST"])
|
|
def api_search():
|
|
token = request.json["token"]
|
|
name = oddb.db.check_api_token(token)
|
|
|
|
if name:
|
|
|
|
try:
|
|
hits = oddb.searchEngine.search(
|
|
request.json["query"],
|
|
request.json["page"], request.json["per_page"],
|
|
request.json["sort_order"],
|
|
request.json["extensions"],
|
|
request.json["size_min"], request.json["size_max"],
|
|
request.json["match_all"],
|
|
request.json["fields"],
|
|
request.json["date_min"], request.json["date_max"]
|
|
)
|
|
|
|
hits = oddb.db.join_website_on_search_result(hits)
|
|
oddb.logger.info("API search '" + request.json["query"] + "' by " + name)
|
|
return json.dumps(hits)
|
|
|
|
except InvalidQueryException as e:
|
|
oddb.logger.info("API search failed: " + str(e))
|
|
return str(e)
|
|
else:
|
|
return abort(403)
|
|
|
|
@app.route("/cap", methods=["GET"])
|
|
def cap():
|
|
word = captcha.make_captcha()
|
|
session["cap"] = word
|
|
|
|
return send_file(captcha.get_path(word), cache_timeout=0)
|
|
|