mirror of
https://github.com/simon987/od-database.git
synced 2025-04-16 08:56:44 +00:00
Searches are not saved to database
This commit is contained in:
parent
14037c5f21
commit
5383ad6aea
85
app.py
85
app.py
@ -39,7 +39,6 @@ def datetime_format(value, format='%Y-%m-%d %H:%M:%S'):
|
|||||||
|
|
||||||
@app.route("/dl")
|
@app.route("/dl")
|
||||||
def downloads():
|
def downloads():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
export_file_stats = os.stat("static/out.csv.xz")
|
export_file_stats = os.stat("static/out.csv.xz")
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
@ -64,7 +63,6 @@ def stats_json():
|
|||||||
|
|
||||||
@app.route("/get_export")
|
@app.route("/get_export")
|
||||||
def get_export():
|
def get_export():
|
||||||
|
|
||||||
if os.path.exists("static/out.csv.xz"):
|
if os.path.exists("static/out.csv.xz"):
|
||||||
return send_from_directory("static", "out.csv.xz", as_attachment=True, mimetype="application/x-xz")
|
return send_from_directory("static", "out.csv.xz", as_attachment=True, mimetype="application/x-xz")
|
||||||
return abort(404)
|
return abort(404)
|
||||||
@ -72,7 +70,6 @@ def get_export():
|
|||||||
|
|
||||||
@app.route("/website/<int:website_id>/")
|
@app.route("/website/<int:website_id>/")
|
||||||
def website_info(website_id):
|
def website_info(website_id):
|
||||||
|
|
||||||
website = db.get_website_by_id(website_id)
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
if website:
|
if website:
|
||||||
@ -84,7 +81,6 @@ def website_info(website_id):
|
|||||||
@app.route("/website/<int:website_id>/json_chart")
|
@app.route("/website/<int:website_id>/json_chart")
|
||||||
@cache.memoize(30)
|
@cache.memoize(30)
|
||||||
def website_json_chart(website_id):
|
def website_json_chart(website_id):
|
||||||
|
|
||||||
website = db.get_website_by_id(website_id)
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
if website:
|
if website:
|
||||||
@ -98,7 +94,6 @@ def website_json_chart(website_id):
|
|||||||
|
|
||||||
@app.route("/website/<int:website_id>/links")
|
@app.route("/website/<int:website_id>/links")
|
||||||
def website_links(website_id):
|
def website_links(website_id):
|
||||||
|
|
||||||
website = db.get_website_by_id(website_id)
|
website = db.get_website_by_id(website_id)
|
||||||
|
|
||||||
if website:
|
if website:
|
||||||
@ -116,7 +111,6 @@ def websites():
|
|||||||
|
|
||||||
@app.route("/website/redispatch_queued")
|
@app.route("/website/redispatch_queued")
|
||||||
def admin_redispatch_queued():
|
def admin_redispatch_queued():
|
||||||
|
|
||||||
if "username" in session:
|
if "username" in session:
|
||||||
|
|
||||||
count = taskDispatcher.redispatch_queued()
|
count = taskDispatcher.redispatch_queued()
|
||||||
@ -128,7 +122,6 @@ def admin_redispatch_queued():
|
|||||||
|
|
||||||
|
|
||||||
def get_empty_websites():
|
def get_empty_websites():
|
||||||
|
|
||||||
current_tasks = itertools.chain(taskDispatcher.get_queued_tasks(), taskDispatcher.get_current_tasks())
|
current_tasks = itertools.chain(taskDispatcher.get_queued_tasks(), taskDispatcher.get_current_tasks())
|
||||||
|
|
||||||
queued_websites = [task.website_id for task in current_tasks]
|
queued_websites = [task.website_id for task in current_tasks]
|
||||||
@ -147,7 +140,7 @@ def admin_delete_empty_website():
|
|||||||
empty_websites = get_empty_websites()
|
empty_websites = get_empty_websites()
|
||||||
|
|
||||||
for website in empty_websites:
|
for website in empty_websites:
|
||||||
#db.delete_website(website)
|
# db.delete_website(website)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
flash("Deleted: " + repr(list(empty_websites)), "success")
|
flash("Deleted: " + repr(list(empty_websites)), "success")
|
||||||
@ -174,7 +167,6 @@ def admin_queue_empty_websites():
|
|||||||
|
|
||||||
@app.route("/website/<int:website_id>/clear")
|
@app.route("/website/<int:website_id>/clear")
|
||||||
def admin_clear_website(website_id):
|
def admin_clear_website(website_id):
|
||||||
|
|
||||||
if "username" in session:
|
if "username" in session:
|
||||||
|
|
||||||
searchEngine.delete_docs(website_id)
|
searchEngine.delete_docs(website_id)
|
||||||
@ -186,7 +178,6 @@ def admin_clear_website(website_id):
|
|||||||
|
|
||||||
@app.route("/website/<int:website_id>/delete")
|
@app.route("/website/<int:website_id>/delete")
|
||||||
def admin_delete_website(website_id):
|
def admin_delete_website(website_id):
|
||||||
|
|
||||||
if "username" in session:
|
if "username" in session:
|
||||||
|
|
||||||
searchEngine.delete_docs(website_id)
|
searchEngine.delete_docs(website_id)
|
||||||
@ -200,7 +191,6 @@ def admin_delete_website(website_id):
|
|||||||
|
|
||||||
@app.route("/website/<int:website_id>/rescan")
|
@app.route("/website/<int:website_id>/rescan")
|
||||||
def admin_rescan_website(website_id):
|
def admin_rescan_website(website_id):
|
||||||
|
|
||||||
if "username" in session:
|
if "username" in session:
|
||||||
|
|
||||||
website = db.get_website_by_id(website_id)
|
website = db.get_website_by_id(website_id)
|
||||||
@ -221,7 +211,6 @@ def admin_rescan_website(website_id):
|
|||||||
|
|
||||||
@app.route("/search")
|
@app.route("/search")
|
||||||
def search():
|
def search():
|
||||||
|
|
||||||
q = request.args.get("q") if "q" in request.args else ""
|
q = request.args.get("q") if "q" in request.args else ""
|
||||||
sort_order = request.args.get("sort_order") if "sort_order" in request.args else "score"
|
sort_order = request.args.get("sort_order") if "sort_order" in request.args else "score"
|
||||||
|
|
||||||
@ -274,6 +263,10 @@ def search():
|
|||||||
else:
|
else:
|
||||||
hits = None
|
hits = None
|
||||||
|
|
||||||
|
db.log_search(request.remote_addr,
|
||||||
|
request.headers["X-Forwarded-For"] if "X-Forwarded-For" in request.headers else None,
|
||||||
|
q, extensions, page)
|
||||||
|
|
||||||
return render_template("search.html",
|
return render_template("search.html",
|
||||||
results=hits,
|
results=hits,
|
||||||
q=q,
|
q=q,
|
||||||
@ -294,7 +287,6 @@ def contribute():
|
|||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def home():
|
def home():
|
||||||
|
|
||||||
stats = searchEngine.get_global_stats()
|
stats = searchEngine.get_global_stats()
|
||||||
stats["website_count"] = len(db.get_all_websites())
|
stats["website_count"] = len(db.get_all_websites())
|
||||||
current_websites = ", ".join(task.url for task in taskDispatcher.get_current_tasks())
|
current_websites = ", ".join(task.url for task in taskDispatcher.get_current_tasks())
|
||||||
@ -308,7 +300,6 @@ def submit():
|
|||||||
|
|
||||||
|
|
||||||
def try_enqueue(url):
|
def try_enqueue(url):
|
||||||
|
|
||||||
url = os.path.join(url, "")
|
url = os.path.join(url, "")
|
||||||
website = db.get_website_by_url(url)
|
website = db.get_website_by_url(url)
|
||||||
|
|
||||||
@ -323,16 +314,15 @@ def try_enqueue(url):
|
|||||||
return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme.", "danger"
|
return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme.", "danger"
|
||||||
|
|
||||||
if db.is_blacklisted(url):
|
if db.is_blacklisted(url):
|
||||||
|
|
||||||
return "<strong>Error:</strong> " \
|
return "<strong>Error:</strong> " \
|
||||||
"Sorry, this website has been blacklisted. If you think " \
|
"Sorry, this website has been blacklisted. If you think " \
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
if not od_util.is_od(url):
|
if not od_util.is_od(url):
|
||||||
return "<strong>Error:</strong>" \
|
return "<strong>Error:</strong>" \
|
||||||
"The anti-spam algorithm determined that the submitted url is not " \
|
"The anti-spam algorithm determined that the submitted url is not " \
|
||||||
"an open directory or the server is not responding. If you think " \
|
"an open directory or the server is not responding. If you think " \
|
||||||
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
"this is an error, please <a href='/contribute'>contact me</a>.", "danger"
|
||||||
|
|
||||||
web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent)))
|
web_id = db.insert_website(Website(url, str(request.remote_addr), str(request.user_agent)))
|
||||||
|
|
||||||
@ -346,42 +336,45 @@ def try_enqueue(url):
|
|||||||
def enqueue():
|
def enqueue():
|
||||||
# if recaptcha.verify():
|
# if recaptcha.verify():
|
||||||
|
|
||||||
url = os.path.join(request.form.get("url"), "")
|
url = os.path.join(request.form.get("url"), "")
|
||||||
message, msg_type = try_enqueue(url)
|
message, msg_type = try_enqueue(url)
|
||||||
flash(message, msg_type)
|
flash(message, msg_type)
|
||||||
|
|
||||||
return redirect("/submit")
|
return redirect("/submit")
|
||||||
# else:
|
|
||||||
# flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
|
||||||
# return redirect("/submit")
|
# else:
|
||||||
|
# flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
||||||
|
# return redirect("/submit")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enqueue_bulk", methods=["POST"])
|
@app.route("/enqueue_bulk", methods=["POST"])
|
||||||
def enqueue_bulk():
|
def enqueue_bulk():
|
||||||
# if recaptcha.verify():
|
# if recaptcha.verify():
|
||||||
|
|
||||||
urls = request.form.get("urls")
|
urls = request.form.get("urls")
|
||||||
if urls:
|
if urls:
|
||||||
urls = urls.split()
|
urls = urls.split()
|
||||||
|
|
||||||
if 0 < len(urls) <= 1000000000000:
|
if 0 < len(urls) <= 1000000000000:
|
||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
url = os.path.join(url, "")
|
url = os.path.join(url, "")
|
||||||
message, msg_type = try_enqueue(url)
|
message, msg_type = try_enqueue(url)
|
||||||
message += ' <span class="badge badge-' + msg_type + '">' + url + '</span>'
|
message += ' <span class="badge badge-' + msg_type + '">' + url + '</span>'
|
||||||
flash(message, msg_type)
|
flash(message, msg_type)
|
||||||
return redirect("/submit")
|
return redirect("/submit")
|
||||||
|
|
||||||
else:
|
|
||||||
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
|
||||||
return redirect("/submit")
|
|
||||||
else:
|
else:
|
||||||
return abort(500)
|
flash("Too few or too many urls, please submit 1-10 urls", "danger")
|
||||||
|
return redirect("/submit")
|
||||||
|
else:
|
||||||
|
return abort(500)
|
||||||
|
|
||||||
# else:
|
|
||||||
# flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
# else:
|
||||||
# return redirect("/submit")
|
# flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
|
||||||
|
# return redirect("/submit")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/admin")
|
@app.route("/admin")
|
||||||
@ -393,7 +386,6 @@ def admin_login_form():
|
|||||||
|
|
||||||
@app.route("/login", methods=["POST"])
|
@app.route("/login", methods=["POST"])
|
||||||
def admin_login():
|
def admin_login():
|
||||||
|
|
||||||
if recaptcha.verify():
|
if recaptcha.verify():
|
||||||
|
|
||||||
username = request.form.get("username")
|
username = request.form.get("username")
|
||||||
@ -448,7 +440,6 @@ def admin_blacklist_add():
|
|||||||
@app.route("/blacklist/<int:blacklist_id>/delete")
|
@app.route("/blacklist/<int:blacklist_id>/delete")
|
||||||
def admin_blacklist_remove(blacklist_id):
|
def admin_blacklist_remove(blacklist_id):
|
||||||
if "username" in session:
|
if "username" in session:
|
||||||
|
|
||||||
db.remove_blacklist_website(blacklist_id)
|
db.remove_blacklist_website(blacklist_id)
|
||||||
flash("Removed blacklist item", "success")
|
flash("Removed blacklist item", "success")
|
||||||
return redirect("/dashboard")
|
return redirect("/dashboard")
|
||||||
@ -525,11 +516,9 @@ def admin_delete_crawl_server(server_id):
|
|||||||
|
|
||||||
@app.route("/crawl_server/<int:server_id>/update", methods=["POST"])
|
@app.route("/crawl_server/<int:server_id>/update", methods=["POST"])
|
||||||
def admin_update_crawl_server(server_id):
|
def admin_update_crawl_server(server_id):
|
||||||
|
|
||||||
crawl_servers = db.get_crawl_servers()
|
crawl_servers = db.get_crawl_servers()
|
||||||
for server in crawl_servers:
|
for server in crawl_servers:
|
||||||
if server.id == server_id:
|
if server.id == server_id:
|
||||||
|
|
||||||
new_slots = request.form.get("slots") if "slots" in request.form else server.slots
|
new_slots = request.form.get("slots") if "slots" in request.form else server.slots
|
||||||
new_name = request.form.get("name") if "name" in request.form else server.name
|
new_name = request.form.get("name") if "name" in request.form else server.name
|
||||||
new_url = request.form.get("url") if "url" in request.form else server.url
|
new_url = request.form.get("url") if "url" in request.form else server.url
|
||||||
|
@ -355,6 +355,15 @@ class Database:
|
|||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
def log_search(self, remote_addr, forwarded_for, q, exts, page):
|
||||||
|
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("INSERT INTO SearchLogEntry (remote_addr, forwarded_for, query, extensions, page) VALUES "
|
||||||
|
"(?,?,?,?,?)", (remote_addr, forwarded_for, q, ",".join(exts), page))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,3 +44,14 @@ CREATE TABLE TaskResult (
|
|||||||
|
|
||||||
FOREIGN KEY (server) REFERENCES CrawlServer(id)
|
FOREIGN KEY (server) REFERENCES CrawlServer(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE SearchLogEntry (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
search_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
remote_addr TEXT,
|
||||||
|
forwarded_for TEXT,
|
||||||
|
query TEXT,
|
||||||
|
extensions TEXT,
|
||||||
|
page INT
|
||||||
|
);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user