Added redispatch button and fixed typo in load balancing code

This commit is contained in:
Simon 2018-06-24 10:07:46 -04:00
parent 1ac510ff53
commit a6d753c6ee
6 changed files with 66 additions and 4 deletions

16
app.py
View File

@ -113,6 +113,19 @@ def websites():
return render_template("websites.html", websites=db.get_websites(100, page))
@app.route("/website/redispatch_queued")
def admin_redispatch_queued():
if "username" in session:
count = taskDispatcher.redispatch_queued()
flash("Re-dispatched " + str(count) + " tasks", "success")
return redirect("/dashboard")
else:
abort(404)
@app.route("/website/delete_empty")
def admin_delete_empty_website():
"""Delete websites with no associated files that are not queued"""
@ -127,7 +140,8 @@ def admin_delete_empty_website():
empty_websites = searchEngine.are_empty(non_queued_websites)
for website in empty_websites:
db.delete_website(website)
#db.delete_website(website)
pass
flash("Deleted: " + repr(list(empty_websites)), "success")
return redirect("/dashboard")

View File

@ -83,6 +83,16 @@ class TaskManagerDatabase:
else:
return None
def pop_all_tasks(self):
tasks = self.get_tasks()
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM Queue")
return tasks
def put_task(self, task: Task):
with sqlite3.connect(self.db_path) as conn:

View File

@ -90,6 +90,14 @@ def get_task_logs():
return Response(json_str, mimetype="application/json")
@app.route("/task/pop_all")
@auth.login_required
def pop_queued_tasks():
json_str = json.dumps([task.to_json() for task in tm.pop_tasks()])
return Response(json_str, mimetype="application/json")
@app.route("/stats/")
@auth.login_required
def get_stats():

View File

@ -27,6 +27,9 @@ class TaskManager:
def get_tasks(self):
return self.db.get_tasks()
def pop_tasks(self):
return self.db.pop_all_tasks()
def get_current_tasks(self):
return self.current_tasks

29
task.py
View File

@ -143,6 +143,21 @@ class CrawlServer:
except ConnectionError:
return {}
def pop_queued_tasks(self):
try:
r = requests.get(self.url + "/task/pop_all", headers=self._generate_headers(), verify=False)
if r.status_code != 200:
print("Problem while popping tasks for '" + self.name + "': " + str(r.status_code))
print(r.text)
return [
Task(t["website_id"], t["url"], t["priority"], t["callback_type"], t["callback_args"])
for t in json.loads(r.text)
]
except ConnectionError:
return []
class TaskDispatcher:
@ -176,7 +191,7 @@ class TaskDispatcher:
def _get_available_crawl_server(self) -> CrawlServer:
queued_tasks_by_server = self._get_current_tasks_by_server()
queued_tasks_by_server = self._get_queued_tasks_by_server()
server_with_most_free_slots = None
most_free_slots = -10000
@ -253,3 +268,15 @@ class TaskDispatcher:
stats[server.name] = server_stats
return stats
def redispatch_queued(self) -> int:
counter = 0
for server in self.db.get_crawl_servers():
for task in server.pop_queued_tasks():
self.dispatch_task(task)
counter += 1
return counter

View File

@ -119,8 +119,8 @@
<hr>
<h3>Misc actions</h3>
<a class="btn btn-danger" href="/website/delete_empty">Delete websites with no associated files that are
not queued</a>
<a class="btn btn-danger" href="/website/delete_empty">Delete websites with no associated files that are not queued</a>
<a class="btn btn-danger" href="/website/redispatch_queued">Re-dispatch queued tasks</a>
<hr>
<a class="btn btn-info" href="/logout">Logout</a>