Crawl server now holds at most max_workers + 1 tasks in pool to minimize waiting time and to avoid loss of too many tasks in case of crash/restart

This commit is contained in:
Simon
2018-06-12 22:28:36 -04:00
parent 24ef493245
commit 2fe81e4b06
5 changed files with 73 additions and 39 deletions

9
app.py
View File

@@ -134,6 +134,7 @@ def contribute():
def home():
stats = {}
stats = searchEngine.get_global_stats()
current_websites = ", ".join(task.url for task in taskDispatcher.get_current_tasks())
return render_template("home.html", stats=stats, current_websites=current_websites)
@@ -195,7 +196,7 @@ def enqueue():
@app.route("/enqueue_bulk", methods=["POST"])
def enqueue_bulk():
if recaptcha.verify():
# if recaptcha.verify():
urls = request.form.get("urls")
if urls:
@@ -216,9 +217,9 @@ def enqueue_bulk():
else:
return abort(500)
else:
flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
return redirect("/submit")
# else:
# flash("<strong>Error:</strong> Invalid captcha please try again", "danger")
# return redirect("/submit")
@app.route("/admin")