mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 02:46:45 +00:00
Change ES settings, big refactor, removed recaptcha
This commit is contained in:
parent
d905c3efd5
commit
7f857d641f
@ -27,10 +27,7 @@ Create `/config.py` and fill out the parameters. Sample config:
|
|||||||
CAPTCHA_LOGIN = False
|
CAPTCHA_LOGIN = False
|
||||||
CAPTCHA_SUBMIT = False
|
CAPTCHA_SUBMIT = False
|
||||||
CAPTCHA_SEARCH = False
|
CAPTCHA_SEARCH = False
|
||||||
CAPTCHA_SITE_KEY = ""
|
CAPTCHA_EVERY = 10
|
||||||
CAPTCHA_SECRET_KEY = ""
|
|
||||||
CAPTCHA_S_SITE_KEY = ""
|
|
||||||
CAPTCHA_S_SECRET_KEY = ""
|
|
||||||
|
|
||||||
# Flask secret key for sessions
|
# Flask secret key for sessions
|
||||||
FLASK_SECRET = ""
|
FLASK_SECRET = ""
|
||||||
|
@ -3,12 +3,3 @@ from logging import FileHandler, StreamHandler
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
logger = logging.getLogger("default")
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
|
||||||
file_handler = FileHandler("oddb.log")
|
|
||||||
file_handler.setFormatter(formatter)
|
|
||||||
logger.addHandler(file_handler)
|
|
||||||
logger.addHandler(StreamHandler(sys.stdout))
|
|
||||||
|
|
||||||
|
265
api.py
Normal file
265
api.py
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
from flask import request, abort, Response, send_file, session
|
||||||
|
|
||||||
|
import common as oddb
|
||||||
|
import captcha
|
||||||
|
from callbacks import PostCrawlCallbackFactory
|
||||||
|
from database import Task, Website
|
||||||
|
from search.search import InvalidQueryException
|
||||||
|
from tasks import TaskResult
|
||||||
|
|
||||||
|
uploadLock = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def setup_api(app):
|
||||||
|
@app.route("/api/task/get", methods=["POST"])
|
||||||
|
def api_get_task():
|
||||||
|
token = request.form.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
accept_ftp = request.form.get("accept") == "ftp" if "accept" in request.form else False
|
||||||
|
|
||||||
|
if name:
|
||||||
|
task = oddb.db.pop_task(name, accept_ftp)
|
||||||
|
oddb.logger.debug("API get task from " + name)
|
||||||
|
|
||||||
|
if task:
|
||||||
|
oddb.logger.info("Assigning task " + str(task.to_json()) + " to " + name)
|
||||||
|
else:
|
||||||
|
oddb.logger.info("No queued tasks, creating a new one")
|
||||||
|
|
||||||
|
try:
|
||||||
|
website_id = oddb.db.get_oldest_website_id()
|
||||||
|
website = oddb.db.get_website_by_id(website_id)
|
||||||
|
task = Task(website_id, website.url)
|
||||||
|
oddb.db.put_task(task)
|
||||||
|
|
||||||
|
task = oddb.db.pop_task(name, accept_ftp)
|
||||||
|
except:
|
||||||
|
oddb.logger.error("Couldn't create new task")
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
return Response(str(task), mimetype="application/json")
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/task/cancel", methods=["POST"])
|
||||||
|
def api_cancel_task():
|
||||||
|
token = request.form.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
website_id = request.form.get("website_id") if "website_id" in request.form else None
|
||||||
|
if website_id:
|
||||||
|
oddb.logger.debug("API task cancel for " + str(website_id) + " by " + name)
|
||||||
|
oddb.db.delete_task(website_id)
|
||||||
|
return Response("cancelled task")
|
||||||
|
else:
|
||||||
|
abort(400)
|
||||||
|
|
||||||
|
else:
|
||||||
|
abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/task/complete", methods=["POST"])
|
||||||
|
def api_complete_task():
|
||||||
|
token = request.form.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
tr = json.loads(request.form.get("result"))
|
||||||
|
oddb.logger.debug("Task result: " + str(tr))
|
||||||
|
task_result = TaskResult(tr["status_code"], tr["file_count"], tr["start_time"], tr["end_time"],
|
||||||
|
tr["website_id"])
|
||||||
|
|
||||||
|
oddb.logger.info("Task for " + str(task_result.website_id) + " completed by " + name)
|
||||||
|
task = oddb.db.complete_task(task_result.website_id, name)
|
||||||
|
|
||||||
|
if task:
|
||||||
|
|
||||||
|
filename = "./tmp/" + str(task_result.website_id) + ".json"
|
||||||
|
if not os.path.exists(filename):
|
||||||
|
filename = None
|
||||||
|
oddb.taskManager.complete_task(filename, task, task_result, name)
|
||||||
|
|
||||||
|
if filename and os.path.exists(filename):
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
# Handle task callback
|
||||||
|
callback = PostCrawlCallbackFactory.get_callback(task)
|
||||||
|
if callback:
|
||||||
|
callback.run(task_result, oddb.search)
|
||||||
|
|
||||||
|
return "Successfully logged task result and indexed files"
|
||||||
|
|
||||||
|
else:
|
||||||
|
oddb.logger.error("ERROR: " + name + " indicated that task for " + str(task_result.website_id) +
|
||||||
|
" was completed but there is no such task in the database.")
|
||||||
|
return "No such task"
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/task/upload", methods=["POST"])
|
||||||
|
def api_upload():
|
||||||
|
token = request.form.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
website_id = request.form.get("website_id")
|
||||||
|
oddb.logger.debug("Result part upload for '" + str(website_id) + "' by " + name)
|
||||||
|
|
||||||
|
if "file_list" in request.files:
|
||||||
|
file = request.files['file_list']
|
||||||
|
|
||||||
|
filename = "./tmp/" + str(website_id) + ".json"
|
||||||
|
|
||||||
|
# Read the file into memory cuz if the request fails
|
||||||
|
# no file is corrupted.
|
||||||
|
buf = file.stream.read()
|
||||||
|
|
||||||
|
# Write to file (create if not exists) when
|
||||||
|
# everything read successfully.
|
||||||
|
with uploadLock:
|
||||||
|
with open(filename, "a+b") as f:
|
||||||
|
f.write(buf)
|
||||||
|
|
||||||
|
oddb.logger.debug("Written chunk to file")
|
||||||
|
return "ok"
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/website/by_url", methods=["GET"])
|
||||||
|
def api_website_by_url():
|
||||||
|
token = request.args.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
url = request.args.get("url")
|
||||||
|
website = oddb.db.get_website_by_url(url)
|
||||||
|
oddb.logger.info("API get website by url '" + url + "' by " + name)
|
||||||
|
if website:
|
||||||
|
return str(website.id)
|
||||||
|
return abort(404)
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/website/blacklisted", methods=["GET"])
|
||||||
|
def api_website_is_blacklisted():
|
||||||
|
token = request.args.get("token")
|
||||||
|
url = request.args.get("url")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
oddb.logger.info("API get website is blacklisted '" + url + "' by " + name)
|
||||||
|
return str(oddb.db.is_blacklisted(url))
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/website/add", methods=["GET"])
|
||||||
|
def api_add_website():
|
||||||
|
token = request.args.get("token")
|
||||||
|
url = request.args.get("url")
|
||||||
|
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
if name:
|
||||||
|
|
||||||
|
website_id = oddb.db.insert_website(Website(url, str(request.remote_addr + "_" +
|
||||||
|
request.headers.get("X-Forwarded-For", "")),
|
||||||
|
"API_CLIENT_" + name))
|
||||||
|
oddb.logger.info("API add website '" + url + "' by " + name + "(" + str(website_id) + ")")
|
||||||
|
return str(website_id)
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/task/force_enqueue", methods=["POST"])
|
||||||
|
def api_task_enqueue():
|
||||||
|
try:
|
||||||
|
token = request.json["token"]
|
||||||
|
except KeyError:
|
||||||
|
return abort(400)
|
||||||
|
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
|
||||||
|
task = Task(
|
||||||
|
request.json["website_id"],
|
||||||
|
request.json["url"],
|
||||||
|
request.json["priority"],
|
||||||
|
request.json["callback_type"],
|
||||||
|
json.dumps(request.json["callback_args"])
|
||||||
|
)
|
||||||
|
|
||||||
|
oddb.logger.info("API force enqueue by " + name + "\n(" + str(task.to_json()) + ")")
|
||||||
|
|
||||||
|
oddb.taskManager.queue_task(task)
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/task/try_enqueue", methods=["POST"])
|
||||||
|
def api_task_try_enqueue():
|
||||||
|
token = request.form.get("token")
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
|
||||||
|
url = request.form.get("url")
|
||||||
|
message, result = oddb.try_enqueue(url)
|
||||||
|
|
||||||
|
oddb.logger.info("API try enqueue '" + url + "' by " + name + " (" + message + ")")
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"message": message,
|
||||||
|
"result": result
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/website/random")
|
||||||
|
def api_random_website():
|
||||||
|
token = request.json["token"]
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
oddb.logger.info("API get random website by " + name)
|
||||||
|
return str(oddb.db.get_random_website_id())
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/api/search", methods=["POST"])
|
||||||
|
def api_search():
|
||||||
|
token = request.json["token"]
|
||||||
|
name = oddb.db.check_api_token(token)
|
||||||
|
|
||||||
|
if name:
|
||||||
|
|
||||||
|
try:
|
||||||
|
hits = oddb.searchEngine.search(
|
||||||
|
request.json["query"],
|
||||||
|
request.json["page"], request.json["per_page"],
|
||||||
|
request.json["sort_order"],
|
||||||
|
request.json["extensions"],
|
||||||
|
request.json["size_min"], request.json["size_max"],
|
||||||
|
request.json["match_all"],
|
||||||
|
request.json["fields"],
|
||||||
|
request.json["date_min"], request.json["date_max"]
|
||||||
|
)
|
||||||
|
|
||||||
|
hits = oddb.db.join_website_on_search_result(hits)
|
||||||
|
oddb.logger.info("API search '" + request.json["query"] + "' by " + name)
|
||||||
|
return json.dumps(hits)
|
||||||
|
|
||||||
|
except InvalidQueryException as e:
|
||||||
|
oddb.logger.info("API search failed: " + str(e))
|
||||||
|
return str(e)
|
||||||
|
else:
|
||||||
|
return abort(403)
|
||||||
|
|
||||||
|
@app.route("/cap", methods=["GET"])
|
||||||
|
def cap():
|
||||||
|
word = captcha.make_captcha()
|
||||||
|
session["cap"] = word
|
||||||
|
|
||||||
|
return send_file(captcha.get_path(word), cache_timeout=0)
|
||||||
|
|
824
app.py
824
app.py
@ -1,824 +1,16 @@
|
|||||||
from flask import Flask, render_template, redirect, request, flash, abort, Response, session
|
from flask import Flask
|
||||||
from multiprocessing import Pool
|
|
||||||
import json
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
from database import Database, Website
|
|
||||||
from flask_recaptcha import ReCaptcha
|
|
||||||
import od_util
|
|
||||||
import config
|
|
||||||
from flask_caching import Cache
|
|
||||||
from tasks import TaskManager, Task, TaskResult
|
|
||||||
from search.search import ElasticSearchEngine, InvalidQueryException
|
|
||||||
from callbacks import PostCrawlCallbackFactory
|
|
||||||
from threading import Lock
|
|
||||||
|
|
||||||
uploadLock = Lock()
|
import api
|
||||||
|
import config
|
||||||
|
import views
|
||||||
|
import template_filters
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.secret_key = config.FLASK_SECRET
|
app.secret_key = config.FLASK_SECRET
|
||||||
|
template_filters.setup_template_filters(app)
|
||||||
|
|
||||||
# Disable flask logging
|
views.setup_views(app)
|
||||||
flaskLogger = logging.getLogger('werkzeug')
|
api.setup_api(app)
|
||||||
flaskLogger.setLevel(logging.ERROR)
|
|
||||||
|
|
||||||
logger = logging.getLogger("default")
|
|
||||||
|
|
||||||
if config.CAPTCHA_SUBMIT or config.CAPTCHA_LOGIN:
|
|
||||||
recaptcha = ReCaptcha(app=app,
|
|
||||||
site_key=config.CAPTCHA_SITE_KEY,
|
|
||||||
secret_key=config.CAPTCHA_SECRET_KEY)
|
|
||||||
else:
|
|
||||||
recaptcha = None
|
|
||||||
if config.CAPTCHA_SEARCH:
|
|
||||||
recaptcha_search = ReCaptcha(app=app,
|
|
||||||
site_key=config.CAPTCHA_S_SITE_KEY,
|
|
||||||
secret_key=config.CAPTCHA_S_SECRET_KEY)
|
|
||||||
else:
|
|
||||||
recaptcha_search = None
|
|
||||||
|
|
||||||
db = Database("db.sqlite3")
|
|
||||||
cache = Cache(app, config={'CACHE_TYPE': 'simple'})
|
|
||||||
app.jinja_env.globals.update(truncate_path=od_util.truncate_path)
|
|
||||||
app.jinja_env.globals.update(get_color=od_util.get_color)
|
|
||||||
app.jinja_env.globals.update(get_mime=od_util.get_category)
|
|
||||||
|
|
||||||
taskManager = TaskManager()
|
|
||||||
searchEngine = ElasticSearchEngine("od-database")
|
|
||||||
searchEngine.start_stats_scheduler()
|
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter("date_format")
|
|
||||||
def date_format(value, format='%Y-%m-%d'):
|
|
||||||
return time.strftime(format, time.gmtime(value))
|
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter("datetime_format")
|
|
||||||
def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
|
||||||
return time.strftime(format, time.gmtime(value))
|
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter("duration_format")
|
|
||||||
def duration_format(value):
|
|
||||||
delay = datetime.timedelta(seconds=value)
|
|
||||||
if (delay.days > 0):
|
|
||||||
out = str(delay).replace(" days, ", ":")
|
|
||||||
else:
|
|
||||||
out = str(delay)
|
|
||||||
out_ar = out.split(':')
|
|
||||||
out_ar = ["%02d" % (int(float(x))) for x in out_ar]
|
|
||||||
out = ":".join(out_ar)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter("from_timestamp")
|
|
||||||
def from_timestamp(value):
|
|
||||||
return datetime.datetime.fromtimestamp(value)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dl")
|
|
||||||
@cache.cached(120)
|
|
||||||
def downloads():
|
|
||||||
# Get content of downloads directory
|
|
||||||
dl_dir = "static/downloads/"
|
|
||||||
dir_content = os.listdir(dl_dir)
|
|
||||||
|
|
||||||
# Make paths relative to working directory
|
|
||||||
# Only allow csv files
|
|
||||||
files = [
|
|
||||||
(name, os.path.join(dl_dir, name))
|
|
||||||
for name in dir_content
|
|
||||||
if name.find(".csv") != -1
|
|
||||||
]
|
|
||||||
|
|
||||||
# Stat files
|
|
||||||
# Remove any dirs placed accidentally
|
|
||||||
files = [
|
|
||||||
(f, full, os.stat(full))
|
|
||||||
for f, full in files
|
|
||||||
if os.path.isfile(full)
|
|
||||||
]
|
|
||||||
|
|
||||||
if len(files) == 0:
|
|
||||||
logger.warning("No export file to display in /dl")
|
|
||||||
|
|
||||||
return render_template("downloads.html", export_file_stats=files)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/stats")
|
|
||||||
@cache.cached(120)
|
|
||||||
def stats_page():
|
|
||||||
crawl_server_stats = db.get_stats_by_crawler()
|
|
||||||
return render_template("stats.html", crawl_server_stats=crawl_server_stats)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/stats/json_chart")
|
|
||||||
@cache.cached(240)
|
|
||||||
def stats_json():
|
|
||||||
stats = searchEngine.get_global_stats()
|
|
||||||
if stats:
|
|
||||||
db.join_website_on_stats(stats)
|
|
||||||
return Response(json.dumps(stats), mimetype="application/json")
|
|
||||||
return abort(500)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/")
|
|
||||||
def website_info(website_id):
|
|
||||||
website = db.get_website_by_id(website_id)
|
|
||||||
|
|
||||||
if website:
|
|
||||||
return render_template("website.html", website=website)
|
|
||||||
else:
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/json_chart")
|
|
||||||
@cache.memoize(60)
|
|
||||||
def website_json_chart(website_id):
|
|
||||||
website = db.get_website_by_id(website_id)
|
|
||||||
|
|
||||||
if website:
|
|
||||||
stats = searchEngine.get_stats(website_id)
|
|
||||||
stats["base_url"] = website.url
|
|
||||||
stats["report_time"] = website.last_modified
|
|
||||||
return Response(json.dumps(stats), mimetype="application/json")
|
|
||||||
else:
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/links")
|
|
||||||
def website_links(website_id):
|
|
||||||
website = db.get_website_by_id(website_id)
|
|
||||||
|
|
||||||
if website:
|
|
||||||
links = searchEngine.get_link_list(website_id, website.url)
|
|
||||||
return Response("\n".join(links), mimetype="text/plain")
|
|
||||||
else:
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/")
|
|
||||||
def websites():
|
|
||||||
page = int(request.args.get("p")) if "p" in request.args else 0
|
|
||||||
url = request.args.get("url") if "url" in request.args else ""
|
|
||||||
if url:
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
if parsed_url.scheme:
|
|
||||||
search_term = (parsed_url.scheme + "://" + parsed_url.netloc)
|
|
||||||
else:
|
|
||||||
flash("Sorry, I was not able to parse this url format. "
|
|
||||||
"Make sure you include the appropriate scheme (http/https/ftp)", "warning")
|
|
||||||
search_term = ""
|
|
||||||
else:
|
|
||||||
search_term = url
|
|
||||||
|
|
||||||
return render_template("websites.html",
|
|
||||||
websites=db.get_websites(50, page, search_term),
|
|
||||||
p=page, url=search_term, per_page=50)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/random")
|
|
||||||
def random_website():
|
|
||||||
return redirect("/website/" + str(db.get_random_website_id()))
|
|
||||||
|
|
||||||
|
|
||||||
## TODO: move to DB
|
|
||||||
def get_empty_websites():
|
|
||||||
current_tasks = taskManager.get_queued_tasks()
|
|
||||||
|
|
||||||
queued_websites = [task.website_id for task in current_tasks]
|
|
||||||
all_websites = db.get_all_websites()
|
|
||||||
non_queued_websites = list(set(all_websites).difference(queued_websites))
|
|
||||||
|
|
||||||
return searchEngine.are_empty(non_queued_websites)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/delete_empty")
|
|
||||||
def admin_delete_empty_website():
|
|
||||||
"""Delete websites with no associated files that are not queued"""
|
|
||||||
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
empty_websites = get_empty_websites()
|
|
||||||
|
|
||||||
for website in empty_websites:
|
|
||||||
# db.delete_website(website)
|
|
||||||
pass
|
|
||||||
|
|
||||||
flash("Deleted: " + repr(list(empty_websites)), "success")
|
|
||||||
return redirect("/dashboard")
|
|
||||||
|
|
||||||
else:
|
|
||||||
abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/clear")
|
|
||||||
def admin_clear_website(website_id):
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
searchEngine.delete_docs(website_id)
|
|
||||||
flash("Cleared all documents associated with this website", "success")
|
|
||||||
return redirect("/website/" + str(website_id))
|
|
||||||
else:
|
|
||||||
abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/delete")
|
|
||||||
def admin_delete_website(website_id):
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
searchEngine.delete_docs(website_id)
|
|
||||||
db.delete_website(website_id)
|
|
||||||
flash("Deleted website " + str(website_id), "success")
|
|
||||||
return redirect("/website/")
|
|
||||||
|
|
||||||
else:
|
|
||||||
abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/<int:website_id>/rescan")
|
|
||||||
def admin_rescan_website(website_id):
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
website = db.get_website_by_id(website_id)
|
|
||||||
|
|
||||||
if website:
|
|
||||||
priority = request.args.get("priority") if "priority" in request.args else 1
|
|
||||||
task = Task(website_id, website.url, priority)
|
|
||||||
taskManager.queue_task(task)
|
|
||||||
|
|
||||||
flash("Enqueued rescan task", "success")
|
|
||||||
else:
|
|
||||||
flash("Website does not exist", "danger")
|
|
||||||
return redirect("/website/" + str(website_id))
|
|
||||||
|
|
||||||
else:
|
|
||||||
abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search")
|
|
||||||
def search():
|
|
||||||
q = request.args.get("q") if "q" in request.args else ""
|
|
||||||
sort_order = request.args.get("sort_order") if "sort_order" in request.args else "score"
|
|
||||||
|
|
||||||
page = request.args.get("p") if "p" in request.args else "0"
|
|
||||||
page = int(page) if page.isdigit() else 0
|
|
||||||
|
|
||||||
per_page = request.args.get("per_page") if "per_page" in request.args else "50"
|
|
||||||
per_page = int(per_page) if per_page.isdigit() else "50"
|
|
||||||
per_page = per_page if per_page in config.RESULTS_PER_PAGE else 50
|
|
||||||
|
|
||||||
extensions = request.args.get("ext") if "ext" in request.args else None
|
|
||||||
extensions = [ext.strip().strip(".").lower() for ext in extensions.split(",")] if extensions else []
|
|
||||||
|
|
||||||
size_min = request.args.get("size_min") if "size_min" in request.args else "size_min"
|
|
||||||
size_min = int(size_min) if size_min.isdigit() else 0
|
|
||||||
size_max = request.args.get("size_max") if "size_max" in request.args else "size_max"
|
|
||||||
size_max = int(size_max) if size_max.isdigit() else 0
|
|
||||||
|
|
||||||
date_min = request.args.get("date_min") if "date_min" in request.args else "date_min"
|
|
||||||
date_min = int(date_min) if date_min.isdigit() else 0
|
|
||||||
date_max = request.args.get("date_max") if "date_max" in request.args else "date_max"
|
|
||||||
date_max = int(date_max) if date_max.isdigit() else 0
|
|
||||||
|
|
||||||
match_all = "all" in request.args
|
|
||||||
|
|
||||||
field_name = "field_name" in request.args
|
|
||||||
field_trigram = "field_trigram" in request.args
|
|
||||||
field_path = "field_path" in request.args
|
|
||||||
|
|
||||||
if not field_name and not field_trigram and not field_path:
|
|
||||||
# If no fields are selected, search in all
|
|
||||||
field_name = field_path = field_trigram = True
|
|
||||||
|
|
||||||
fields = []
|
|
||||||
if field_path:
|
|
||||||
fields.append("path")
|
|
||||||
if field_name:
|
|
||||||
fields.append("name^5")
|
|
||||||
if field_trigram:
|
|
||||||
fields.append("name.nGram^2")
|
|
||||||
|
|
||||||
if len(q) >= 3:
|
|
||||||
|
|
||||||
blocked = False
|
|
||||||
hits = None
|
|
||||||
response = request.args.get("g-recaptcha-response", "")
|
|
||||||
if not config.CAPTCHA_SEARCH or recaptcha_search.verify(response):
|
|
||||||
|
|
||||||
try:
|
|
||||||
hits = searchEngine.search(q, page, per_page, sort_order,
|
|
||||||
extensions, size_min, size_max, match_all, fields, date_min, date_max)
|
|
||||||
hits = db.join_website_on_search_result(hits)
|
|
||||||
except InvalidQueryException as e:
|
|
||||||
flash("<strong>Invalid query:</strong> " + str(e), "warning")
|
|
||||||
blocked = True
|
|
||||||
except:
|
|
||||||
flash("Query failed, this could mean that the search server is overloaded or is not reachable. "
|
|
||||||
"Please try again later", "danger")
|
|
||||||
|
|
||||||
results = hits["hits"]["total"] if hits else -1
|
|
||||||
took = hits["took"] if hits else -1
|
|
||||||
forwarded_for = request.headers["X-Forwarded-For"] if "X-Forwarded-For" in request.headers else None
|
|
||||||
|
|
||||||
logger.info("SEARCH '{}' [res={}, t={}, p={}x{}, ext={}] by {}{}"
|
|
||||||
.format(q, results, took, page, per_page, str(extensions),
|
|
||||||
request.remote_addr, "_" + forwarded_for if forwarded_for else ""))
|
|
||||||
|
|
||||||
db.log_search(request.remote_addr, forwarded_for, q, extensions, page, blocked, results, took)
|
|
||||||
if blocked:
|
|
||||||
return redirect("/search")
|
|
||||||
else:
|
|
||||||
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
|
||||||
|
|
||||||
else:
|
|
||||||
hits = None
|
|
||||||
|
|
||||||
return render_template("search.html",
|
|
||||||
results=hits,
|
|
||||||
q=q,
|
|
||||||
p=page, per_page=per_page,
|
|
||||||
sort_order=sort_order,
|
|
||||||
results_set=config.RESULTS_PER_PAGE,
|
|
||||||
extensions=",".join(extensions),
|
|
||||||
size_min=size_min, size_max=size_max,
|
|
||||||
match_all=match_all,
|
|
||||||
field_trigram=field_trigram, field_path=field_path, field_name=field_name,
|
|
||||||
date_min=date_min, date_max=date_max,
|
|
||||||
show_captcha=config.CAPTCHA_SEARCH, recaptcha=recaptcha_search)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/contribute")
|
|
||||||
@cache.cached(600)
|
|
||||||
def contribute():
|
|
||||||
return render_template("contribute.html")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
|
||||||
@cache.cached(240)
|
|
||||||
def home():
|
|
||||||
try:
|
|
||||||
stats = searchEngine.get_global_stats()
|
|
||||||
stats["website_count"] = len(db.get_all_websites())
|
|
||||||
except:
|
|
||||||
stats = {}
|
|
||||||
return render_template("home.html", stats=stats,
|
|
||||||
show_captcha=config.CAPTCHA_SEARCH, recaptcha=recaptcha_search)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/submit")
|
|
||||||
def submit():
|
|
||||||
queued_websites = taskManager.get_queued_tasks()[:30]
|
|
||||||
return render_template("submit.html", queue=queued_websites, recaptcha=recaptcha,
|
|
||||||
show_captcha=config.CAPTCHA_SUBMIT)
|
|
||||||
|
|
||||||
|
|
||||||
def try_enqueue(url):
|
|
||||||
url = os.path.join(url, "")
|
|
||||||
url = od_util.get_top_directory(url)
|
|
||||||
|
|
||||||
if not od_util.is_valid_url(url):
|
|
||||||
return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme.", "warning"
|
|
||||||
|
|
||||||
website = db.get_website_by_url(url)
|
|
||||||
if website:
|
|
||||||
return "Website already exists", "danger"
|
|
||||||
|
|
||||||
website = db.website_exists(url)
|
|
||||||
if website:
|
|
||||||
return "A parent directory of this url has already been posted", "danger"
|
|
||||||
|
|
||||||
if db.is_blacklisted(url):
|
|
||||||
return "<strong>Error:</strong> " \
|
|
||||||
"Sorry, this website has been blacklisted. If you think " \
|
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
|
||||||
|
|
||||||
if not od_util.is_od(url):
|
|
||||||
return "<strong>Error:</strong>" \
|
|
||||||
"The anti-spam algorithm determined that the submitted url is not " \
|
|
||||||
"an open directory or the server is not responding. If you think " \
|
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
|
||||||
|
|
||||||
website_id = db.insert_website(Website(url, str(request.remote_addr + "_" +
|
|
||||||
request.headers.get("X-Forwarded-For", "")),
|
|
||||||
request.user_agent))
|
|
||||||
|
|
||||||
task = Task(website_id, url, priority=1)
|
|
||||||
taskManager.queue_task(task)
|
|
||||||
|
|
||||||
return "The website has been added to the queue", "success"
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enqueue", methods=["POST"])
|
|
||||||
def enqueue():
|
|
||||||
if not config.CAPTCHA_SUBMIT or recaptcha.verify():
|
|
||||||
|
|
||||||
url = os.path.join(request.form.get("url"), "")
|
|
||||||
message, msg_type = try_enqueue(url)
|
|
||||||
flash(message, msg_type)
|
|
||||||
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
else:
|
|
||||||
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
|
|
||||||
def check_url(url):
|
|
||||||
url = os.path.join(url, "")
|
|
||||||
try_enqueue(url)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enqueue_bulk", methods=["POST"])
|
|
||||||
def enqueue_bulk():
|
|
||||||
if not config.CAPTCHA_SUBMIT or recaptcha.verify():
|
|
||||||
|
|
||||||
urls = request.form.get("urls")
|
|
||||||
if urls:
|
|
||||||
urls = urls.split()
|
|
||||||
|
|
||||||
if 0 < len(urls) <= 1000: # TODO: Load from config & adjust placeholder/messages?
|
|
||||||
|
|
||||||
pool = Pool(processes=6)
|
|
||||||
pool.map(func=check_url, iterable=urls)
|
|
||||||
pool.close()
|
|
||||||
|
|
||||||
flash("Submitted websites to the queue", "success")
|
|
||||||
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
else:
|
|
||||||
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
else:
|
|
||||||
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
else:
|
|
||||||
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/admin")
|
|
||||||
def admin_login_form():
|
|
||||||
if "username" in session:
|
|
||||||
return redirect("/dashboard")
|
|
||||||
return render_template("admin.html", recaptcha=recaptcha, show_captcha=config.CAPTCHA_LOGIN)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/login", methods=["POST"])
|
|
||||||
def admin_login():
|
|
||||||
if not config.CAPTCHA_LOGIN or recaptcha.verify():
|
|
||||||
|
|
||||||
username = request.form.get("username")
|
|
||||||
password = request.form.get("password")
|
|
||||||
|
|
||||||
if db.check_login(username, password):
|
|
||||||
session["username"] = username
|
|
||||||
flash("Logged in", "success")
|
|
||||||
return redirect("/dashboard")
|
|
||||||
|
|
||||||
flash("Invalid username/password combo", "danger")
|
|
||||||
return redirect("/admin")
|
|
||||||
|
|
||||||
else:
|
|
||||||
flash("Invalid captcha", "danger")
|
|
||||||
return redirect("/admin")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/logout")
|
|
||||||
def admin_logout():
|
|
||||||
session.clear()
|
|
||||||
flash("Logged out", "info")
|
|
||||||
return redirect("/")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/dashboard")
|
|
||||||
def admin_dashboard():
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
tokens = db.get_tokens()
|
|
||||||
blacklist = db.get_blacklist()
|
|
||||||
|
|
||||||
return render_template("dashboard.html", api_tokens=tokens, blacklist=blacklist)
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/blacklist/add", methods=["POST"])
|
|
||||||
def admin_blacklist_add():
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
url = request.form.get("url")
|
|
||||||
db.add_blacklist_website(url)
|
|
||||||
flash("Added item to blacklist", "success")
|
|
||||||
return redirect("/dashboard")
|
|
||||||
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/blacklist/<int:blacklist_id>/delete")
|
|
||||||
def admin_blacklist_remove(blacklist_id):
|
|
||||||
if "username" in session:
|
|
||||||
db.remove_blacklist_website(blacklist_id)
|
|
||||||
flash("Removed blacklist item", "success")
|
|
||||||
return redirect("/dashboard")
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/generate_token", methods=["POST"])
|
|
||||||
def admin_generate_token():
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
description = request.form.get("description")
|
|
||||||
|
|
||||||
db.generate_api_token(description)
|
|
||||||
flash("Generated API token", "success")
|
|
||||||
|
|
||||||
return redirect("/dashboard")
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/del_token", methods=["POST"])
|
|
||||||
def admin_del_token():
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
token = request.form.get("token")
|
|
||||||
|
|
||||||
db.delete_token(token)
|
|
||||||
flash("Deleted API token", "success")
|
|
||||||
return redirect("/dashboard")
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: pages scrolling
|
|
||||||
@app.route("/logs", methods=["GET"])
|
|
||||||
def admin_crawl_logs():
|
|
||||||
if "username" in session:
|
|
||||||
|
|
||||||
results = db.get_crawl_logs()
|
|
||||||
|
|
||||||
return render_template("crawl_logs.html", logs=results)
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/get", methods=["POST"])
|
|
||||||
def api_get_task():
|
|
||||||
token = request.form.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
accept_ftp = request.form.get("accept") == "ftp" if "accept" in request.form else False
|
|
||||||
|
|
||||||
if name:
|
|
||||||
task = db.pop_task(name, accept_ftp)
|
|
||||||
logger.debug("API get task from " + name)
|
|
||||||
|
|
||||||
if task:
|
|
||||||
logger.info("Assigning task " + str(task.to_json()) + " to " + name)
|
|
||||||
else:
|
|
||||||
logger.info("No queued tasks, creating a new one")
|
|
||||||
|
|
||||||
try:
|
|
||||||
website_id = db.get_oldest_website_id()
|
|
||||||
website = db.get_website_by_id(website_id)
|
|
||||||
task = Task(website_id, website.url)
|
|
||||||
db.put_task(task)
|
|
||||||
|
|
||||||
task = db.pop_task(name, accept_ftp)
|
|
||||||
except:
|
|
||||||
logger.error("Couldn't create new task")
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
return Response(str(task), mimetype="application/json")
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/cancel", methods=["POST"])
|
|
||||||
def api_cancel_task():
|
|
||||||
token = request.form.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
website_id = request.form.get("website_id") if "website_id" in request.form else None
|
|
||||||
if website_id:
|
|
||||||
logger.debug("API task cancel for " + str(website_id) + " by " + name)
|
|
||||||
db.delete_task(website_id)
|
|
||||||
return Response("cancelled task")
|
|
||||||
else:
|
|
||||||
abort(400)
|
|
||||||
|
|
||||||
else:
|
|
||||||
abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/complete", methods=["POST"])
|
|
||||||
def api_complete_task():
|
|
||||||
token = request.form.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
tr = json.loads(request.form.get("result"))
|
|
||||||
logger.debug("Task result: " + str(tr))
|
|
||||||
task_result = TaskResult(tr["status_code"], tr["file_count"], tr["start_time"], tr["end_time"],
|
|
||||||
tr["website_id"])
|
|
||||||
|
|
||||||
logger.info("Task for " + str(task_result.website_id) + " completed by " + name)
|
|
||||||
task = db.complete_task(task_result.website_id, name)
|
|
||||||
|
|
||||||
if task:
|
|
||||||
|
|
||||||
filename = "./tmp/" + str(task_result.website_id) + ".json"
|
|
||||||
if not os.path.exists(filename):
|
|
||||||
filename = None
|
|
||||||
taskManager.complete_task(filename, task, task_result, name)
|
|
||||||
|
|
||||||
if filename and os.path.exists(filename):
|
|
||||||
os.remove(filename)
|
|
||||||
|
|
||||||
# Handle task callback
|
|
||||||
callback = PostCrawlCallbackFactory.get_callback(task)
|
|
||||||
if callback:
|
|
||||||
callback.run(task_result, search)
|
|
||||||
|
|
||||||
return "Successfully logged task result and indexed files"
|
|
||||||
|
|
||||||
else:
|
|
||||||
logger.error("ERROR: " + name + " indicated that task for " + str(task_result.website_id) +
|
|
||||||
" was completed but there is no such task in the database.")
|
|
||||||
return "No such task"
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/upload", methods=["POST"])
|
|
||||||
def api_upload():
|
|
||||||
token = request.form.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
website_id = request.form.get("website_id")
|
|
||||||
logger.debug("Result part upload for '" + str(website_id) + "' by " + name)
|
|
||||||
|
|
||||||
if "file_list" in request.files:
|
|
||||||
file = request.files['file_list']
|
|
||||||
|
|
||||||
filename = "./tmp/" + str(website_id) + ".json"
|
|
||||||
|
|
||||||
# Read the file into memory cuz if the request fails
|
|
||||||
# no file is corrupted.
|
|
||||||
buf = file.stream.read()
|
|
||||||
|
|
||||||
# Write to file (create if not exists) when
|
|
||||||
# everything read successfully.
|
|
||||||
with uploadLock:
|
|
||||||
with open(filename, "a+b") as f:
|
|
||||||
f.write(buf)
|
|
||||||
|
|
||||||
logger.debug("Written chunk to file")
|
|
||||||
return "ok"
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/website/by_url", methods=["GET"])
|
|
||||||
def api_website_by_url():
|
|
||||||
token = request.args.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
url = request.args.get("url")
|
|
||||||
website = db.get_website_by_url(url)
|
|
||||||
logger.info("API get website by url '" + url + "' by " + name)
|
|
||||||
if website:
|
|
||||||
return str(website.id)
|
|
||||||
return abort(404)
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/website/blacklisted", methods=["GET"])
|
|
||||||
def api_website_is_blacklisted():
|
|
||||||
token = request.args.get("token")
|
|
||||||
url = request.args.get("url")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
logger.info("API get website is blacklisted '" + url + "' by " + name)
|
|
||||||
return str(db.is_blacklisted(url))
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/website/add", methods=["GET"])
|
|
||||||
def api_add_website():
|
|
||||||
token = request.args.get("token")
|
|
||||||
url = request.args.get("url")
|
|
||||||
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
if name:
|
|
||||||
|
|
||||||
website_id = db.insert_website(Website(url, str(request.remote_addr + "_" +
|
|
||||||
request.headers.get("X-Forwarded-For", "")),
|
|
||||||
"API_CLIENT_" + name))
|
|
||||||
logger.info("API add website '" + url + "' by " + name + "(" + str(website_id) + ")")
|
|
||||||
return str(website_id)
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/force_enqueue", methods=["POST"])
|
|
||||||
def api_task_enqueue():
|
|
||||||
try:
|
|
||||||
token = request.json["token"]
|
|
||||||
except KeyError:
|
|
||||||
return abort(400)
|
|
||||||
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
|
|
||||||
task = Task(
|
|
||||||
request.json["website_id"],
|
|
||||||
request.json["url"],
|
|
||||||
request.json["priority"],
|
|
||||||
request.json["callback_type"],
|
|
||||||
json.dumps(request.json["callback_args"])
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("API force enqueue by " + name + "\n(" + str(task.to_json()) + ")")
|
|
||||||
|
|
||||||
taskManager.queue_task(task)
|
|
||||||
return ""
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/task/try_enqueue", methods=["POST"])
|
|
||||||
def api_task_try_enqueue():
|
|
||||||
token = request.form.get("token")
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
|
|
||||||
url = request.form.get("url")
|
|
||||||
message, result = try_enqueue(url)
|
|
||||||
|
|
||||||
logger.info("API try enqueue '" + url + "' by " + name + " (" + message + ")")
|
|
||||||
|
|
||||||
return json.dumps({
|
|
||||||
"message": message,
|
|
||||||
"result": result
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/website/random")
|
|
||||||
def api_random_website():
|
|
||||||
token = request.json["token"]
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
logger.info("API get random website by " + name)
|
|
||||||
return str(db.get_random_website_id())
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/search", methods=["POST"])
|
|
||||||
def api_search():
|
|
||||||
token = request.json["token"]
|
|
||||||
name = db.check_api_token(token)
|
|
||||||
|
|
||||||
if name:
|
|
||||||
|
|
||||||
try:
|
|
||||||
hits = searchEngine.search(
|
|
||||||
request.json["query"],
|
|
||||||
request.json["page"], request.json["per_page"],
|
|
||||||
request.json["sort_order"],
|
|
||||||
request.json["extensions"],
|
|
||||||
request.json["size_min"], request.json["size_max"],
|
|
||||||
request.json["match_all"],
|
|
||||||
request.json["fields"],
|
|
||||||
request.json["date_min"], request.json["date_max"]
|
|
||||||
)
|
|
||||||
|
|
||||||
hits = db.join_website_on_search_result(hits)
|
|
||||||
logger.info("API search '" + request.json["query"] + "' by " + name)
|
|
||||||
return json.dumps(hits)
|
|
||||||
|
|
||||||
except InvalidQueryException as e:
|
|
||||||
logger.info("API search failed: " + str(e))
|
|
||||||
return str(e)
|
|
||||||
else:
|
|
||||||
return abort(403)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run("0.0.0.0", port=12345, threaded=True)
|
app.run("0.0.0.0", port=12345, threaded=True)
|
||||||
|
92
captcha.py
Normal file
92
captcha.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
import pylab
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
import mpl_toolkits.mplot3d.axes3d as axes3d
|
||||||
|
import io
|
||||||
|
from wand.image import Image as WImage
|
||||||
|
from flask import request, session
|
||||||
|
|
||||||
|
import config
|
||||||
|
from common import logger
|
||||||
|
|
||||||
|
SIZE = (60, 20)
|
||||||
|
with open("words.txt") as f:
|
||||||
|
WORDS = f.read().splitlines(keepends=False)
|
||||||
|
|
||||||
|
|
||||||
|
def get_code():
|
||||||
|
|
||||||
|
if "cap_remaining" in session and session["cap_remaining"] > 0:
|
||||||
|
return """
|
||||||
|
<span class='text-muted' style='margin: 10px'>You will not be asked to complete a captcha for the next {} pages</span>
|
||||||
|
""".format(session["cap_remaining"])
|
||||||
|
|
||||||
|
return """
|
||||||
|
<div class='form-group' style='text-align: center'>
|
||||||
|
<img src='./cap' alt='cap' class='img-fluid' style='margin: 10px;'>
|
||||||
|
<input class="form-control" name="cap" id="cap" placeholder="Verification captcha">
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_path(word):
|
||||||
|
return "captchas/{}.png".format(word)
|
||||||
|
|
||||||
|
|
||||||
|
def verify():
|
||||||
|
if "cap_remaining" in session and session["cap_remaining"] > 0:
|
||||||
|
session["cap_remaining"] -= 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
attempt = request.form.get("cap") if "cap" in request.form else (
|
||||||
|
request.args.get("cap") if "cap" in request.args else ""
|
||||||
|
)
|
||||||
|
|
||||||
|
if "cap" in session and session["cap"] == attempt:
|
||||||
|
session["cap_remaining"] = config.CAPTCHA_EVERY
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def make_captcha():
|
||||||
|
word = random.choice(WORDS)
|
||||||
|
path = get_path(word)
|
||||||
|
|
||||||
|
logger.info("generating CAPTCHA: " + word)
|
||||||
|
|
||||||
|
if os.path.exists(path):
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
|
image = Image.new('L', SIZE, 255)
|
||||||
|
image_draw = ImageDraw.Draw(image)
|
||||||
|
font = ImageFont.truetype("static/Hack-Regular.ttf", 12)
|
||||||
|
|
||||||
|
image_draw.text((5, 3), word, font=font)
|
||||||
|
|
||||||
|
x, y = numpy.meshgrid(range(SIZE[0]), range(SIZE[1]))
|
||||||
|
z = 1 - numpy.asarray(image) / 255
|
||||||
|
|
||||||
|
fig = pylab.figure()
|
||||||
|
ax = axes3d.Axes3D(fig)
|
||||||
|
ax.plot_wireframe(x, -y, z, rstride=1, cstride=1)
|
||||||
|
ax.set_zlim((0, 20))
|
||||||
|
ax.set_axis_off()
|
||||||
|
pylab.close(fig)
|
||||||
|
|
||||||
|
buf = io.BytesIO()
|
||||||
|
fig.savefig(buf, dpi=150)
|
||||||
|
buf.seek(0)
|
||||||
|
image.close()
|
||||||
|
|
||||||
|
with WImage(blob=buf.read()) as img:
|
||||||
|
img.trim()
|
||||||
|
img.save(filename=path)
|
||||||
|
|
||||||
|
return word
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
make_captcha()
|
33
common.py
Normal file
33
common.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from logging import FileHandler, StreamHandler
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from database import Database
|
||||||
|
from search.search import ElasticSearchEngine
|
||||||
|
from tasks import TaskManager
|
||||||
|
import logging
|
||||||
|
from flask import session, abort
|
||||||
|
|
||||||
|
# Disable flask logging
|
||||||
|
flaskLogger = logging.getLogger('werkzeug')
|
||||||
|
flaskLogger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
logger = logging.getLogger("default")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
||||||
|
file_handler = FileHandler("oddb.log")
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(StreamHandler(sys.stdout))
|
||||||
|
|
||||||
|
taskManager = TaskManager()
|
||||||
|
searchEngine = ElasticSearchEngine("od-database")
|
||||||
|
searchEngine.start_stats_scheduler()
|
||||||
|
db = Database("db.sqlite3")
|
||||||
|
|
||||||
|
|
||||||
|
def require_role(role: str):
|
||||||
|
|
||||||
|
if db.get_user_role(session.get("username", None)) != role:
|
||||||
|
abort(403)
|
15
database.py
15
database.py
@ -8,7 +8,6 @@ import uuid
|
|||||||
import tasks
|
import tasks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BlacklistedWebsite:
|
class BlacklistedWebsite:
|
||||||
def __init__(self, blacklist_id, url):
|
def __init__(self, blacklist_id, url):
|
||||||
self.id = blacklist_id
|
self.id = blacklist_id
|
||||||
@ -182,6 +181,18 @@ class Database:
|
|||||||
return bcrypt.checkpw(password.encode(), db_user[0])
|
return bcrypt.checkpw(password.encode(), db_user[0])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_user_role(self, username: str):
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("SELECT role FROM Admin WHERE username=?", (username, ))
|
||||||
|
|
||||||
|
db_user = cursor.fetchone()
|
||||||
|
|
||||||
|
if db_user:
|
||||||
|
return db_user[0]
|
||||||
|
return False
|
||||||
|
|
||||||
def generate_login(self, username, password) -> None:
|
def generate_login(self, username, password) -> None:
|
||||||
|
|
||||||
with sqlite3.connect(self.db_path) as conn:
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
@ -189,7 +200,7 @@ class Database:
|
|||||||
|
|
||||||
hashed_pw = bcrypt.hashpw(password.encode(), bcrypt.gensalt(12))
|
hashed_pw = bcrypt.hashpw(password.encode(), bcrypt.gensalt(12))
|
||||||
|
|
||||||
cursor.execute("INSERT INTO Admin (username, password) VALUES (?,?)", (username, hashed_pw))
|
cursor.execute("INSERT INTO Admin (username, password, role) VALUES (?,?, 'admin')", (username, hashed_pw))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
def check_api_token(self, token) -> str:
|
def check_api_token(self, token) -> str:
|
||||||
|
@ -11,7 +11,8 @@ CREATE TABLE Website (
|
|||||||
|
|
||||||
CREATE TABLE Admin (
|
CREATE TABLE Admin (
|
||||||
username TEXT PRIMARY KEY NOT NULL,
|
username TEXT PRIMARY KEY NOT NULL,
|
||||||
password TEXT
|
password TEXT,
|
||||||
|
role TEXT
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE BlacklistedWebsite (
|
CREATE TABLE BlacklistedWebsite (
|
||||||
|
@ -3,7 +3,6 @@ flask_testing
|
|||||||
requests
|
requests
|
||||||
bs4
|
bs4
|
||||||
validators
|
validators
|
||||||
flask_recaptcha
|
|
||||||
Flask-Caching
|
Flask-Caching
|
||||||
praw
|
praw
|
||||||
humanfriendly
|
humanfriendly
|
||||||
@ -19,3 +18,6 @@ pyOpenSSL
|
|||||||
pybloom-live
|
pybloom-live
|
||||||
pycurl
|
pycurl
|
||||||
lxml
|
lxml
|
||||||
|
pillow
|
||||||
|
Wand
|
||||||
|
numpy
|
25
restore.py
Normal file
25
restore.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
from search.search import ElasticSearchEngine
|
||||||
|
import ujson
|
||||||
|
|
||||||
|
es = ElasticSearchEngine("od-database")
|
||||||
|
es.reset()
|
||||||
|
|
||||||
|
with open("dump.json", "r") as f:
|
||||||
|
|
||||||
|
buffer = list()
|
||||||
|
index_every = 10000
|
||||||
|
|
||||||
|
for line in f:
|
||||||
|
try:
|
||||||
|
doc = ujson.loads(line)["_source"]
|
||||||
|
buffer.append(doc)
|
||||||
|
|
||||||
|
if len(buffer) >= index_every:
|
||||||
|
es._index(buffer)
|
||||||
|
buffer.clear()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("ERROR: " + str(e))
|
||||||
|
|
||||||
|
es._index(buffer)
|
||||||
|
|
@ -75,12 +75,13 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
self.es.indices.create(index=self.index_name)
|
self.es.indices.create(index=self.index_name)
|
||||||
self.es.indices.close(index=self.index_name)
|
self.es.indices.close(index=self.index_name)
|
||||||
|
|
||||||
# File names and paths
|
# Index settings
|
||||||
self.es.indices.put_settings(body={
|
self.es.indices.put_settings(body={
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"tokenizer": {
|
"tokenizer": {
|
||||||
"my_nGram_tokenizer": {
|
"my_nGram_tokenizer": {
|
||||||
"type": "nGram", "min_gram": 3, "max_gram": 3}
|
"type": "nGram", "min_gram": 3, "max_gram": 3
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}}, index=self.index_name)
|
}}, index=self.index_name)
|
||||||
self.es.indices.put_settings(body={
|
self.es.indices.put_settings(body={
|
||||||
@ -93,15 +94,15 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}
|
}
|
||||||
}}, index=self.index_name)
|
}}, index=self.index_name)
|
||||||
|
|
||||||
# Mappings
|
self.es.indices.put_mapping(body={
|
||||||
self.es.indices.put_mapping(body={"properties": {
|
"properties": {
|
||||||
"path": {"analyzer": "standard", "type": "text"},
|
"path": {"analyzer": "standard", "type": "text"},
|
||||||
"name": {"analyzer": "standard", "type": "text",
|
"name": {"analyzer": "standard", "type": "text",
|
||||||
"fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}}},
|
"fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}}},
|
||||||
"mtime": {"type": "date", "format": "epoch_second"},
|
"mtime": {"type": "date", "format": "epoch_second"},
|
||||||
"size": {"type": "long"},
|
"size": {"type": "long"},
|
||||||
"website_id": {"type": "integer"},
|
"website_id": {"type": "integer"},
|
||||||
"ext": {"type": "keyword"}
|
"ext": {"type": "keyword"},
|
||||||
}}, doc_type="file", index=self.index_name)
|
}}, doc_type="file", index=self.index_name)
|
||||||
|
|
||||||
self.es.indices.open(index=self.index_name)
|
self.es.indices.open(index=self.index_name)
|
||||||
@ -120,9 +121,9 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
|
|
||||||
to_delete = helpers.scan(query={
|
to_delete = helpers.scan(query={
|
||||||
"query": {
|
"query": {
|
||||||
"term": {"website_id": website_id}
|
"match_all": {}
|
||||||
}
|
}
|
||||||
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=120)
|
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=120, routing=website_id)
|
||||||
|
|
||||||
buf = []
|
buf = []
|
||||||
counter = 0
|
counter = 0
|
||||||
@ -130,12 +131,12 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
buf.append(doc)
|
buf.append(doc)
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
if counter >= 400:
|
if counter >= 10000:
|
||||||
self._delete(buf)
|
self._delete(buf, website_id)
|
||||||
buf.clear()
|
buf.clear()
|
||||||
counter = 0
|
counter = 0
|
||||||
if counter > 0:
|
if counter > 0:
|
||||||
self._delete(buf)
|
self._delete(buf, website_id)
|
||||||
break
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -144,9 +145,10 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
|
|
||||||
logger.debug("Done deleting for " + str(website_id))
|
logger.debug("Done deleting for " + str(website_id))
|
||||||
|
|
||||||
def _delete(self, docs):
|
def _delete(self, docs, website_id):
|
||||||
bulk_string = self.create_bulk_delete_string(docs)
|
bulk_string = self.create_bulk_delete_string(docs)
|
||||||
result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30)
|
result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30,
|
||||||
|
routing=website_id)
|
||||||
|
|
||||||
if result["errors"]:
|
if result["errors"]:
|
||||||
logger.error("Error in ES bulk delete: \n" + result["errors"])
|
logger.error("Error in ES bulk delete: \n" + result["errors"])
|
||||||
@ -154,7 +156,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
|
|
||||||
def import_json(self, in_lines, website_id: int):
|
def import_json(self, in_lines, website_id: int):
|
||||||
|
|
||||||
import_every = 400
|
import_every = 10000
|
||||||
cooldown_time = 0
|
cooldown_time = 0
|
||||||
|
|
||||||
docs = []
|
docs = []
|
||||||
@ -183,7 +185,8 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
try:
|
try:
|
||||||
logger.debug("Indexing " + str(len(docs)) + " docs")
|
logger.debug("Indexing " + str(len(docs)) + " docs")
|
||||||
bulk_string = ElasticSearchEngine.create_bulk_index_string(docs)
|
bulk_string = ElasticSearchEngine.create_bulk_index_string(docs)
|
||||||
self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30)
|
self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30,
|
||||||
|
routing=docs[0]["website_id"])
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error in _index: " + str(e) + ", retrying")
|
logger.error("Error in _index: " + str(e) + ", retrying")
|
||||||
@ -293,7 +296,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"size": 0
|
"size": 0
|
||||||
}, index=self.index_name, request_timeout=30)
|
}, index=self.index_name, request_timeout=30, routing=website_id)
|
||||||
|
|
||||||
stats = dict()
|
stats = dict()
|
||||||
stats["total_size"] = result["aggregations"]["total_size"]["value"]
|
stats["total_size"] = result["aggregations"]["total_size"]["value"]
|
||||||
@ -311,11 +314,10 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
"includes": ["path", "name", "ext"]
|
"includes": ["path", "name", "ext"]
|
||||||
},
|
},
|
||||||
"query": {
|
"query": {
|
||||||
"term": {
|
"match_all": {}
|
||||||
"website_id": website_id}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
index=self.index_name, request_timeout=20)
|
index=self.index_name, request_timeout=20, routing=website_id)
|
||||||
for hit in hits:
|
for hit in hits:
|
||||||
src = hit["_source"]
|
src = hit["_source"]
|
||||||
yield base_url + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \
|
yield base_url + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \
|
||||||
@ -431,7 +433,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
"websites": {
|
"websites": {
|
||||||
"terms": {
|
"terms": {
|
||||||
"field": "website_id",
|
"field": "website_id",
|
||||||
"size": 400 # TODO: Figure out what size is appropriate
|
"size": 600 # TODO: Figure out what size is appropriate
|
||||||
},
|
},
|
||||||
"aggs": {
|
"aggs": {
|
||||||
"size": {
|
"size": {
|
||||||
@ -451,7 +453,8 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]
|
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]
|
||||||
stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"]
|
stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"]
|
||||||
stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"]
|
stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"]
|
||||||
stats["es_search_time_avg"] = stats["es_search_time"] / (stats["es_search_count"] if stats["es_search_count"] != 0 else 1)
|
stats["es_search_time_avg"] = stats["es_search_time"] / (
|
||||||
|
stats["es_search_count"] if stats["es_search_count"] != 0 else 1)
|
||||||
|
|
||||||
stats["total_count"] = total_stats["hits"]["total"]
|
stats["total_count"] = total_stats["hits"]["total"]
|
||||||
stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"]
|
stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"]
|
||||||
@ -479,34 +482,5 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}
|
}
|
||||||
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=60)
|
}, scroll="1m", client=self.es, index=self.index_name, request_timeout=60)
|
||||||
|
|
||||||
def are_empty(self, websites):
|
|
||||||
result = self.es.search(body={
|
|
||||||
"query": {
|
|
||||||
"bool": {
|
|
||||||
"filter": {
|
|
||||||
"terms": {
|
|
||||||
"website_id": websites
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"aggs": {
|
|
||||||
"websites": {
|
|
||||||
"terms": {
|
|
||||||
"field": "website_id",
|
|
||||||
"size": 100000,
|
|
||||||
"min_doc_count": 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"size": 0
|
|
||||||
}, index=self.index_name, request_timeout=30)
|
|
||||||
|
|
||||||
non_empty_websites = [bucket["key"] for bucket in result["aggregations"]["websites"]["buckets"]]
|
|
||||||
|
|
||||||
for website in websites:
|
|
||||||
if website not in non_empty_websites:
|
|
||||||
yield website
|
|
||||||
|
|
||||||
def refresh(self):
|
def refresh(self):
|
||||||
self.es.indices.refresh(self.index_name)
|
self.es.indices.refresh(self.index_name)
|
||||||
|
BIN
static/Hack-Regular.ttf
Normal file
BIN
static/Hack-Regular.ttf
Normal file
Binary file not shown.
34
template_filters.py
Normal file
34
template_filters.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import datetime
|
||||||
|
import time
|
||||||
|
import od_util
|
||||||
|
|
||||||
|
|
||||||
|
def setup_template_filters(app):
|
||||||
|
|
||||||
|
app.jinja_env.globals.update(truncate_path=od_util.truncate_path)
|
||||||
|
app.jinja_env.globals.update(get_color=od_util.get_color)
|
||||||
|
app.jinja_env.globals.update(get_mime=od_util.get_category)
|
||||||
|
|
||||||
|
@app.template_filter("date_format")
|
||||||
|
def date_format(value, format='%Y-%m-%d'):
|
||||||
|
return time.strftime(format, time.gmtime(value))
|
||||||
|
|
||||||
|
@app.template_filter("datetime_format")
|
||||||
|
def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
||||||
|
return time.strftime(format, time.gmtime(value))
|
||||||
|
|
||||||
|
@app.template_filter("duration_format")
|
||||||
|
def duration_format(value):
|
||||||
|
delay = datetime.timedelta(seconds=value)
|
||||||
|
if delay.days > 0:
|
||||||
|
out = str(delay).replace(" days, ", ":")
|
||||||
|
else:
|
||||||
|
out = str(delay)
|
||||||
|
out_ar = out.split(':')
|
||||||
|
out_ar = ["%02d" % (int(float(x))) for x in out_ar]
|
||||||
|
out = ":".join(out_ar)
|
||||||
|
return out
|
||||||
|
|
||||||
|
@app.template_filter("from_timestamp")
|
||||||
|
def from_timestamp(value):
|
||||||
|
return datetime.datetime.fromtimestamp(value)
|
@ -16,7 +16,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if show_captcha %}
|
{% if show_captcha %}
|
||||||
{{ recaptcha.get_code()|safe }}
|
{{ captcha.get_code()|safe }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<input type="submit" value="Login">
|
<input type="submit" value="Login">
|
||||||
|
@ -30,20 +30,12 @@
|
|||||||
<input class="form-control" name="q" id="q" placeholder="Query">
|
<input class="form-control" name="q" id="q" placeholder="Query">
|
||||||
</div>
|
</div>
|
||||||
<div class="col-md-1">
|
<div class="col-md-1">
|
||||||
{% if show_captcha %}
|
|
||||||
<script>function f(token) {
|
|
||||||
document.getElementById("sfrm").submit();
|
|
||||||
}</script>
|
|
||||||
<script src="https://www.google.com/recaptcha/api.js" async defer></script>
|
|
||||||
<button class="g-recaptcha btn btn-primary btn-shadow"
|
|
||||||
data-sitekey="6LcCXWkUAAAAAJo2NR9_m09Obn5YmDrI97sGrr2f" data-callback="f">Search
|
|
||||||
</button>
|
|
||||||
{% else %}
|
|
||||||
<input class="btn btn-primary btn-shadow" type="submit" value="Search">
|
<input class="btn btn-primary btn-shadow" type="submit" value="Search">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% if show_captcha %}
|
||||||
|
{{ captcha.get_code()|safe }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -110,21 +110,13 @@
|
|||||||
{# Search button #}
|
{# Search button #}
|
||||||
<div class="form-group col-md-7">
|
<div class="form-group col-md-7">
|
||||||
|
|
||||||
{% if show_captcha %}
|
|
||||||
<script>function f(token) {
|
|
||||||
document.getElementById("sfrm").submit();
|
|
||||||
}</script>
|
|
||||||
<script src="https://www.google.com/recaptcha/api.js" async defer></script>
|
|
||||||
<button id="s" class="g-recaptcha btn btn-primary btn-shadow"
|
|
||||||
data-sitekey="6LcCXWkUAAAAAJo2NR9_m09Obn5YmDrI97sGrr2f" data-callback="f"
|
|
||||||
style="float: right" onclick="document.getElementById('page').value = 0">Search
|
|
||||||
</button>
|
|
||||||
{% else %}
|
|
||||||
<input id="s" class="btn btn-primary btn-shadow" type="submit" value="Search"
|
<input id="s" class="btn btn-primary btn-shadow" type="submit" value="Search"
|
||||||
style="float: right">
|
style="float: right">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% if show_captcha %}
|
||||||
|
{{ captcha.get_code()|safe }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
@ -280,20 +272,12 @@
|
|||||||
//Next button
|
//Next button
|
||||||
function nextPage() {
|
function nextPage() {
|
||||||
document.getElementById("page").value = parseInt(document.getElementById("page").value) + 1;
|
document.getElementById("page").value = parseInt(document.getElementById("page").value) + 1;
|
||||||
{% if show_captcha %}
|
document.getElementById("sfrm").submit();
|
||||||
grecaptcha.execute();
|
|
||||||
{% else %}
|
|
||||||
document.getElementById("sfrm").submit()
|
|
||||||
{% endif %}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function prevPage() {
|
function prevPage() {
|
||||||
document.getElementById("page").value = parseInt(document.getElementById("page").value) - 1;
|
document.getElementById("page").value = parseInt(document.getElementById("page").value) - 1;
|
||||||
{% if show_captcha %}
|
document.getElementById("sfrm").submit();
|
||||||
grecaptcha.execute();
|
|
||||||
{% else %}
|
|
||||||
document.getElementById("sfrm").submit()
|
|
||||||
{% endif %}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
@ -8,10 +8,12 @@
|
|||||||
<div class="card-header">
|
<div class="card-header">
|
||||||
<ul class="nav nav-tabs card-header-tabs" id="myTab" role="tablist">
|
<ul class="nav nav-tabs card-header-tabs" id="myTab" role="tablist">
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link active" id="normal-tab" data-toggle="tab" href="#normal" role="tab" aria-controls="normal" aria-selected="true">Single website</a>
|
<a class="nav-link active" id="normal-tab" data-toggle="tab" href="#normal" role="tab"
|
||||||
|
aria-controls="normal" aria-selected="true">Single website</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link" id="bulk-tab" data-toggle="tab" href="#bulk" role="tab" aria-controls="bulk" aria-selected="false">Bulk</a>
|
<a class="nav-link" id="bulk-tab" data-toggle="tab" href="#bulk" role="tab" aria-controls="bulk"
|
||||||
|
aria-selected="false">Bulk</a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
@ -24,16 +26,12 @@
|
|||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<input class="form-control" name="url" id="url" placeholder="URL">
|
<input class="form-control" name="url" id="url" placeholder="URL">
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
|
||||||
{% if show_captcha %}
|
{% if show_captcha %}
|
||||||
<div class="col">
|
<div class="form-row">
|
||||||
{{ recaptcha.get_code()|safe }}
|
{{ captcha.get_code()|safe }}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div class="col">
|
|
||||||
<input class="btn btn-primary" type="submit" value="Submit" title="Submit open directory">
|
<input class="btn btn-primary" type="submit" value="Submit" title="Submit open directory">
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
@ -41,18 +39,17 @@
|
|||||||
{# Bulk #}
|
{# Bulk #}
|
||||||
<form action="/enqueue_bulk" method="POST">
|
<form action="/enqueue_bulk" method="POST">
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
<textarea class="form-control" name="urls" id="urls" rows="10" placeholder="One URL per line, max. 10"></textarea>
|
<textarea class="form-control" name="urls" id="urls" rows="10"
|
||||||
|
placeholder="One URL per line, max. 10"></textarea>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
|
||||||
{% if show_captcha %}
|
{% if show_captcha %}
|
||||||
<div class="col">
|
<div class="form-row">
|
||||||
{{ recaptcha.get_code()|safe }}
|
{{ captcha.get_code()|safe }}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div class="col">
|
|
||||||
<input class="btn btn-primary" type="submit" value="Submit" title="Submit open directories">
|
<input class="btn btn-primary" type="submit" value="Submit"
|
||||||
</div>
|
title="Submit open directories">
|
||||||
</div>
|
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
422
views.py
Normal file
422
views.py
Normal file
@ -0,0 +1,422 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from multiprocessing.pool import Pool
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from flask import render_template, redirect, request, flash, abort, Response, session
|
||||||
|
from flask_caching import Cache
|
||||||
|
|
||||||
|
import config
|
||||||
|
import od_util
|
||||||
|
from common import db, taskManager, searchEngine, logger, require_role
|
||||||
|
from database import Task, Website
|
||||||
|
from search.search import InvalidQueryException
|
||||||
|
import captcha
|
||||||
|
|
||||||
|
|
||||||
|
def setup_views(app):
|
||||||
|
cache = Cache(app, config={'CACHE_TYPE': 'simple'})
|
||||||
|
|
||||||
|
@app.route("/dl")
|
||||||
|
@cache.cached(120)
|
||||||
|
def downloads():
|
||||||
|
# Get content of downloads directory
|
||||||
|
dl_dir = "static/downloads/"
|
||||||
|
dir_content = os.listdir(dl_dir)
|
||||||
|
|
||||||
|
# Make paths relative to working directory
|
||||||
|
# Only allow csv files
|
||||||
|
files = [
|
||||||
|
(name, os.path.join(dl_dir, name))
|
||||||
|
for name in dir_content
|
||||||
|
if name.find(".csv") != -1
|
||||||
|
]
|
||||||
|
|
||||||
|
# Stat files
|
||||||
|
# Remove any dirs placed accidentally
|
||||||
|
files = [
|
||||||
|
(f, full, os.stat(full))
|
||||||
|
for f, full in files
|
||||||
|
if os.path.isfile(full)
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(files) == 0:
|
||||||
|
logger.warning("No export file to display in /dl")
|
||||||
|
|
||||||
|
return render_template("downloads.html", export_file_stats=files)
|
||||||
|
|
||||||
|
@app.route("/stats")
|
||||||
|
@cache.cached(120)
|
||||||
|
def stats_page():
|
||||||
|
crawl_server_stats = db.get_stats_by_crawler()
|
||||||
|
return render_template("stats.html", crawl_server_stats=crawl_server_stats)
|
||||||
|
|
||||||
|
@app.route("/stats/json_chart")
|
||||||
|
@cache.cached(240)
|
||||||
|
def stats_json():
|
||||||
|
stats = searchEngine.get_global_stats()
|
||||||
|
if stats:
|
||||||
|
db.join_website_on_stats(stats)
|
||||||
|
return Response(json.dumps(stats), mimetype="application/json")
|
||||||
|
return abort(500)
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/")
|
||||||
|
def website_info(website_id):
|
||||||
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
|
if website:
|
||||||
|
return render_template("website.html", website=website)
|
||||||
|
else:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/json_chart")
|
||||||
|
@cache.memoize(60)
|
||||||
|
def website_json_chart(website_id):
|
||||||
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
|
if website:
|
||||||
|
stats = searchEngine.get_stats(website_id)
|
||||||
|
stats["base_url"] = website.url
|
||||||
|
stats["report_time"] = website.last_modified
|
||||||
|
return Response(json.dumps(stats), mimetype="application/json")
|
||||||
|
else:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/links")
|
||||||
|
def website_links(website_id):
|
||||||
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
|
if website:
|
||||||
|
links = searchEngine.get_link_list(website_id, website.url)
|
||||||
|
return Response("\n".join(links), mimetype="text/plain")
|
||||||
|
else:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
@app.route("/website/")
|
||||||
|
def websites():
|
||||||
|
page = int(request.args.get("p")) if "p" in request.args else 0
|
||||||
|
url = request.args.get("url") if "url" in request.args else ""
|
||||||
|
if url:
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
if parsed_url.scheme:
|
||||||
|
search_term = (parsed_url.scheme + "://" + parsed_url.netloc)
|
||||||
|
else:
|
||||||
|
flash("Sorry, I was not able to parse this url format. "
|
||||||
|
"Make sure you include the appropriate scheme (http/https/ftp)", "warning")
|
||||||
|
search_term = ""
|
||||||
|
else:
|
||||||
|
search_term = url
|
||||||
|
|
||||||
|
return render_template("websites.html",
|
||||||
|
websites=db.get_websites(50, page, search_term),
|
||||||
|
p=page, url=search_term, per_page=50)
|
||||||
|
|
||||||
|
@app.route("/website/random")
|
||||||
|
def random_website():
|
||||||
|
return redirect("/website/" + str(db.get_random_website_id()))
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/clear")
|
||||||
|
def admin_clear_website(website_id):
|
||||||
|
require_role("admin")
|
||||||
|
|
||||||
|
searchEngine.delete_docs(website_id)
|
||||||
|
flash("Cleared all documents associated with this website", "success")
|
||||||
|
return redirect("/website/" + str(website_id))
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/delete")
|
||||||
|
def admin_delete_website(website_id):
|
||||||
|
require_role("admin")
|
||||||
|
|
||||||
|
searchEngine.delete_docs(website_id)
|
||||||
|
db.delete_website(website_id)
|
||||||
|
flash("Deleted website " + str(website_id), "success")
|
||||||
|
return redirect("/website/")
|
||||||
|
|
||||||
|
@app.route("/website/<int:website_id>/rescan")
|
||||||
|
def admin_rescan_website(website_id):
|
||||||
|
require_role("admin")
|
||||||
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
|
if website:
|
||||||
|
priority = request.args.get("priority") if "priority" in request.args else 1
|
||||||
|
task = Task(website_id, website.url, priority)
|
||||||
|
taskManager.queue_task(task)
|
||||||
|
|
||||||
|
flash("Enqueued rescan task", "success")
|
||||||
|
else:
|
||||||
|
flash("Website does not exist", "danger")
|
||||||
|
return redirect("/website/" + str(website_id))
|
||||||
|
|
||||||
|
@app.route("/search")
|
||||||
|
def search():
|
||||||
|
q = request.args.get("q") if "q" in request.args else ""
|
||||||
|
sort_order = request.args.get("sort_order") if "sort_order" in request.args else "score"
|
||||||
|
|
||||||
|
page = request.args.get("p") if "p" in request.args else "0"
|
||||||
|
page = int(page) if page.isdigit() else 0
|
||||||
|
|
||||||
|
per_page = request.args.get("per_page") if "per_page" in request.args else "50"
|
||||||
|
per_page = int(per_page) if per_page.isdigit() else "50"
|
||||||
|
per_page = per_page if per_page in config.RESULTS_PER_PAGE else 50
|
||||||
|
|
||||||
|
extensions = request.args.get("ext") if "ext" in request.args else None
|
||||||
|
extensions = [ext.strip().strip(".").lower() for ext in extensions.split(",")] if extensions else []
|
||||||
|
|
||||||
|
size_min = request.args.get("size_min") if "size_min" in request.args else "size_min"
|
||||||
|
size_min = int(size_min) if size_min.isdigit() else 0
|
||||||
|
size_max = request.args.get("size_max") if "size_max" in request.args else "size_max"
|
||||||
|
size_max = int(size_max) if size_max.isdigit() else 0
|
||||||
|
|
||||||
|
date_min = request.args.get("date_min") if "date_min" in request.args else "date_min"
|
||||||
|
date_min = int(date_min) if date_min.isdigit() else 0
|
||||||
|
date_max = request.args.get("date_max") if "date_max" in request.args else "date_max"
|
||||||
|
date_max = int(date_max) if date_max.isdigit() else 0
|
||||||
|
|
||||||
|
match_all = "all" in request.args
|
||||||
|
|
||||||
|
field_name = "field_name" in request.args
|
||||||
|
field_trigram = "field_trigram" in request.args
|
||||||
|
field_path = "field_path" in request.args
|
||||||
|
|
||||||
|
if not field_name and not field_trigram and not field_path:
|
||||||
|
# If no fields are selected, search in all
|
||||||
|
field_name = field_path = field_trigram = True
|
||||||
|
|
||||||
|
fields = []
|
||||||
|
if field_path:
|
||||||
|
fields.append("path")
|
||||||
|
if field_name:
|
||||||
|
fields.append("name^5")
|
||||||
|
if field_trigram:
|
||||||
|
fields.append("name.nGram^2")
|
||||||
|
|
||||||
|
if len(q) >= 3:
|
||||||
|
|
||||||
|
blocked = False
|
||||||
|
hits = None
|
||||||
|
if not config.CAPTCHA_SEARCH or captcha.verify():
|
||||||
|
|
||||||
|
try:
|
||||||
|
hits = searchEngine.search(q, page, per_page, sort_order,
|
||||||
|
extensions, size_min, size_max, match_all, fields, date_min, date_max)
|
||||||
|
hits = db.join_website_on_search_result(hits)
|
||||||
|
except InvalidQueryException as e:
|
||||||
|
flash("<strong>Invalid query:</strong> " + str(e), "warning")
|
||||||
|
blocked = True
|
||||||
|
except:
|
||||||
|
flash("Query failed, this could mean that the search server is overloaded or is not reachable. "
|
||||||
|
"Please try again later", "danger")
|
||||||
|
|
||||||
|
results = hits["hits"]["total"] if hits else -1
|
||||||
|
took = hits["took"] if hits else -1
|
||||||
|
forwarded_for = request.headers["X-Forwarded-For"] if "X-Forwarded-For" in request.headers else None
|
||||||
|
|
||||||
|
logger.info("SEARCH '{}' [res={}, t={}, p={}x{}, ext={}] by {}{}"
|
||||||
|
.format(q, results, took, page, per_page, str(extensions),
|
||||||
|
request.remote_addr, "_" + forwarded_for if forwarded_for else ""))
|
||||||
|
|
||||||
|
db.log_search(request.remote_addr, forwarded_for, q, extensions, page, blocked, results, took)
|
||||||
|
if blocked:
|
||||||
|
return redirect("/search")
|
||||||
|
else:
|
||||||
|
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
||||||
|
|
||||||
|
else:
|
||||||
|
hits = None
|
||||||
|
|
||||||
|
return render_template("search.html",
|
||||||
|
results=hits,
|
||||||
|
q=q,
|
||||||
|
p=page, per_page=per_page,
|
||||||
|
sort_order=sort_order,
|
||||||
|
results_set=config.RESULTS_PER_PAGE,
|
||||||
|
extensions=",".join(extensions),
|
||||||
|
size_min=size_min, size_max=size_max,
|
||||||
|
match_all=match_all,
|
||||||
|
field_trigram=field_trigram, field_path=field_path, field_name=field_name,
|
||||||
|
date_min=date_min, date_max=date_max,
|
||||||
|
show_captcha=config.CAPTCHA_SEARCH, captcha=captcha)
|
||||||
|
|
||||||
|
@app.route("/contribute")
|
||||||
|
@cache.cached(600)
|
||||||
|
def contribute():
|
||||||
|
return render_template("contribute.html")
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def home():
|
||||||
|
try:
|
||||||
|
stats = searchEngine.get_global_stats()
|
||||||
|
stats["website_count"] = len(db.get_all_websites())
|
||||||
|
except:
|
||||||
|
stats = {}
|
||||||
|
return render_template("home.html", stats=stats,
|
||||||
|
show_captcha=config.CAPTCHA_SEARCH, captcha=captcha)
|
||||||
|
|
||||||
|
@app.route("/submit")
|
||||||
|
def submit():
|
||||||
|
queued_websites = taskManager.get_queued_tasks()[:30]
|
||||||
|
return render_template("submit.html", queue=queued_websites, captcha=captcha,
|
||||||
|
show_captcha=config.CAPTCHA_SUBMIT)
|
||||||
|
|
||||||
|
def try_enqueue(url):
|
||||||
|
url = os.path.join(url, "")
|
||||||
|
url = od_util.get_top_directory(url)
|
||||||
|
|
||||||
|
if not od_util.is_valid_url(url):
|
||||||
|
return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme.", "warning"
|
||||||
|
|
||||||
|
website = db.get_website_by_url(url)
|
||||||
|
if website:
|
||||||
|
return "Website already exists", "danger"
|
||||||
|
|
||||||
|
website = db.website_exists(url)
|
||||||
|
if website:
|
||||||
|
return "A parent directory of this url has already been posted", "danger"
|
||||||
|
|
||||||
|
if db.is_blacklisted(url):
|
||||||
|
return "<strong>Error:</strong> " \
|
||||||
|
"Sorry, this website has been blacklisted. If you think " \
|
||||||
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
|
if not od_util.is_od(url):
|
||||||
|
return "<strong>Error:</strong>" \
|
||||||
|
"The anti-spam algorithm determined that the submitted url is not " \
|
||||||
|
"an open directory or the server is not responding. If you think " \
|
||||||
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
|
website_id = db.insert_website(Website(url, str(request.remote_addr + "_" +
|
||||||
|
request.headers.get("X-Forwarded-For", "")),
|
||||||
|
request.user_agent))
|
||||||
|
|
||||||
|
task = Task(website_id, url, priority=1)
|
||||||
|
taskManager.queue_task(task)
|
||||||
|
|
||||||
|
return "The website has been added to the queue", "success"
|
||||||
|
|
||||||
|
@app.route("/enqueue", methods=["POST"])
|
||||||
|
def enqueue():
|
||||||
|
if not config.CAPTCHA_SUBMIT or captcha.verify():
|
||||||
|
|
||||||
|
url = os.path.join(request.form.get("url"), "")
|
||||||
|
message, msg_type = try_enqueue(url)
|
||||||
|
flash(message, msg_type)
|
||||||
|
|
||||||
|
return redirect("/submit")
|
||||||
|
|
||||||
|
else:
|
||||||
|
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
||||||
|
return redirect("/submit")
|
||||||
|
|
||||||
|
def check_url(url):
|
||||||
|
url = os.path.join(url, "")
|
||||||
|
try_enqueue(url)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@app.route("/enqueue_bulk", methods=["POST"])
|
||||||
|
def enqueue_bulk():
|
||||||
|
if not config.CAPTCHA_SUBMIT or captcha.verify():
|
||||||
|
|
||||||
|
urls = request.form.get("urls")
|
||||||
|
if urls:
|
||||||
|
urls = urls.split()
|
||||||
|
|
||||||
|
if 0 < len(urls) <= 1000: # TODO: Load from config & adjust placeholder/messages?
|
||||||
|
|
||||||
|
pool = Pool(processes=6)
|
||||||
|
pool.map(func=check_url, iterable=urls)
|
||||||
|
pool.close()
|
||||||
|
|
||||||
|
flash("Submitted websites to the queue", "success")
|
||||||
|
|
||||||
|
return redirect("/submit")
|
||||||
|
|
||||||
|
else:
|
||||||
|
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
||||||
|
return redirect("/submit")
|
||||||
|
else:
|
||||||
|
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
||||||
|
return redirect("/submit")
|
||||||
|
else:
|
||||||
|
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
||||||
|
return redirect("/submit")
|
||||||
|
|
||||||
|
@app.route("/admin")
|
||||||
|
def admin_login_form():
|
||||||
|
if "username" in session:
|
||||||
|
return redirect("/dashboard")
|
||||||
|
return render_template("admin.html", captcha=captcha, show_captcha=config.CAPTCHA_LOGIN)
|
||||||
|
|
||||||
|
@app.route("/login", methods=["POST"])
|
||||||
|
def admin_login():
|
||||||
|
if not config.CAPTCHA_LOGIN or captcha.verify():
|
||||||
|
|
||||||
|
username = request.form.get("username")
|
||||||
|
password = request.form.get("password")
|
||||||
|
|
||||||
|
if db.check_login(username, password):
|
||||||
|
session["username"] = username
|
||||||
|
flash("Logged in", "success")
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
flash("Invalid username/password combo", "danger")
|
||||||
|
return redirect("/admin")
|
||||||
|
|
||||||
|
else:
|
||||||
|
flash("Invalid captcha", "danger")
|
||||||
|
return redirect("/admin")
|
||||||
|
|
||||||
|
@app.route("/logout")
|
||||||
|
def admin_logout():
|
||||||
|
session.clear()
|
||||||
|
flash("Logged out", "info")
|
||||||
|
return redirect("/")
|
||||||
|
|
||||||
|
@app.route("/dashboard")
|
||||||
|
def admin_dashboard():
|
||||||
|
require_role("admin")
|
||||||
|
tokens = db.get_tokens()
|
||||||
|
blacklist = db.get_blacklist()
|
||||||
|
|
||||||
|
return render_template("dashboard.html", api_tokens=tokens, blacklist=blacklist)
|
||||||
|
|
||||||
|
@app.route("/blacklist/add", methods=["POST"])
|
||||||
|
def admin_blacklist_add():
|
||||||
|
require_role("admin")
|
||||||
|
url = request.form.get("url")
|
||||||
|
db.add_blacklist_website(url)
|
||||||
|
flash("Added item to blacklist", "success")
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
@app.route("/blacklist/<int:blacklist_id>/delete")
|
||||||
|
def admin_blacklist_remove(blacklist_id):
|
||||||
|
require_role("admin")
|
||||||
|
db.remove_blacklist_website(blacklist_id)
|
||||||
|
flash("Removed blacklist item", "success")
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
@app.route("/generate_token", methods=["POST"])
|
||||||
|
def admin_generate_token():
|
||||||
|
require_role("admin")
|
||||||
|
description = request.form.get("description")
|
||||||
|
|
||||||
|
db.generate_api_token(description)
|
||||||
|
flash("Generated API token", "success")
|
||||||
|
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
@app.route("/del_token", methods=["POST"])
|
||||||
|
def admin_del_token():
|
||||||
|
require_role("admin")
|
||||||
|
token = request.form.get("token")
|
||||||
|
|
||||||
|
db.delete_token(token)
|
||||||
|
flash("Deleted API token", "success")
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
# TODO: pages scrolling
|
||||||
|
@app.route("/logs", methods=["GET"])
|
||||||
|
def admin_crawl_logs():
|
||||||
|
require_role("admin")
|
||||||
|
results = db.get_crawl_logs()
|
||||||
|
|
||||||
|
return render_template("crawl_logs.html", logs=results)
|
Loading…
x
Reference in New Issue
Block a user