mirror of
https://github.com/simon987/od-database.git
synced 2025-12-16 08:09:04 +00:00
Logging and bugfix for http crawler
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from flask import Flask, request, abort, Response, send_file
|
||||
from flask_httpauth import HTTPTokenAuth
|
||||
import json
|
||||
from crawl_server import logger
|
||||
from crawl_server.task_manager import TaskManager, Task
|
||||
import os
|
||||
import config
|
||||
@@ -35,11 +36,13 @@ def task_put():
|
||||
priority = request.json["priority"]
|
||||
callback_type = request.json["callback_type"]
|
||||
callback_args = request.json["callback_args"]
|
||||
except KeyError:
|
||||
except KeyError as e:
|
||||
logger.error("Invalid task put request from " + request.remote_addr + " missing key: " + str(e))
|
||||
return abort(400)
|
||||
|
||||
task = Task(website_id, url, priority, callback_type, callback_args)
|
||||
tm.put_task(task)
|
||||
logger.info("Submitted new task to queue: " + str(task.to_json()))
|
||||
return '{"ok": "true"}'
|
||||
|
||||
return abort(400)
|
||||
@@ -49,6 +52,7 @@ def task_put():
|
||||
@auth.login_required
|
||||
def get_completed_tasks():
|
||||
json_str = json.dumps([result.to_json() for result in tm.get_non_indexed_results()])
|
||||
logger.debug("Webserver has requested list of newly completed tasks from " + request.remote_addr)
|
||||
return Response(json_str, mimetype="application/json")
|
||||
|
||||
|
||||
@@ -57,6 +61,7 @@ def get_completed_tasks():
|
||||
def get_current_tasks():
|
||||
|
||||
current_tasks = tm.get_current_tasks()
|
||||
logger.debug("Webserver has requested list of current tasks from " + request.remote_addr)
|
||||
return json.dumps([t.to_json() for t in current_tasks])
|
||||
|
||||
|
||||
@@ -66,8 +71,10 @@ def get_file_list(website_id):
|
||||
|
||||
file_name = "./crawled/" + str(website_id) + ".json"
|
||||
if os.path.exists(file_name):
|
||||
logger.info("Webserver requested file list of website with id" + str(website_id))
|
||||
return send_file(file_name)
|
||||
else:
|
||||
logger.error("Webserver requested file list of non-existent or empty website with id: " + str(website_id))
|
||||
return abort(404)
|
||||
|
||||
|
||||
@@ -77,24 +84,19 @@ def free_file_list(website_id):
|
||||
file_name = "./crawled/" + str(website_id) + ".json"
|
||||
if os.path.exists(file_name):
|
||||
os.remove(file_name)
|
||||
logger.debug("Webserver indicated that the files for the website with id " +
|
||||
str(website_id) + " are safe to delete")
|
||||
return '{"ok": "true"}'
|
||||
else:
|
||||
return abort(404)
|
||||
|
||||
|
||||
@app.route("/task/logs/")
|
||||
@auth.login_required
|
||||
def get_task_logs():
|
||||
|
||||
json_str = json.dumps([result.to_json() for result in tm.get_all_results()])
|
||||
return Response(json_str, mimetype="application/json")
|
||||
|
||||
|
||||
@app.route("/task/pop_all")
|
||||
@auth.login_required
|
||||
def pop_queued_tasks():
|
||||
|
||||
json_str = json.dumps([task.to_json() for task in tm.pop_tasks()])
|
||||
logger.info("Webserver poped all queued tasks")
|
||||
return Response(json_str, mimetype="application/json")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user