mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 10:56:47 +00:00
Added redispatch button and fixed typo in load balancing code
This commit is contained in:
parent
1ac510ff53
commit
a6d753c6ee
16
app.py
16
app.py
@ -113,6 +113,19 @@ def websites():
|
|||||||
return render_template("websites.html", websites=db.get_websites(100, page))
|
return render_template("websites.html", websites=db.get_websites(100, page))
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/website/redispatch_queued")
|
||||||
|
def admin_redispatch_queued():
|
||||||
|
|
||||||
|
if "username" in session:
|
||||||
|
|
||||||
|
count = taskDispatcher.redispatch_queued()
|
||||||
|
flash("Re-dispatched " + str(count) + " tasks", "success")
|
||||||
|
return redirect("/dashboard")
|
||||||
|
|
||||||
|
else:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/website/delete_empty")
|
@app.route("/website/delete_empty")
|
||||||
def admin_delete_empty_website():
|
def admin_delete_empty_website():
|
||||||
"""Delete websites with no associated files that are not queued"""
|
"""Delete websites with no associated files that are not queued"""
|
||||||
@ -127,7 +140,8 @@ def admin_delete_empty_website():
|
|||||||
empty_websites = searchEngine.are_empty(non_queued_websites)
|
empty_websites = searchEngine.are_empty(non_queued_websites)
|
||||||
|
|
||||||
for website in empty_websites:
|
for website in empty_websites:
|
||||||
db.delete_website(website)
|
#db.delete_website(website)
|
||||||
|
pass
|
||||||
|
|
||||||
flash("Deleted: " + repr(list(empty_websites)), "success")
|
flash("Deleted: " + repr(list(empty_websites)), "success")
|
||||||
return redirect("/dashboard")
|
return redirect("/dashboard")
|
||||||
|
@ -83,6 +83,16 @@ class TaskManagerDatabase:
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def pop_all_tasks(self):
|
||||||
|
|
||||||
|
tasks = self.get_tasks()
|
||||||
|
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("DELETE FROM Queue")
|
||||||
|
return tasks
|
||||||
|
|
||||||
def put_task(self, task: Task):
|
def put_task(self, task: Task):
|
||||||
|
|
||||||
with sqlite3.connect(self.db_path) as conn:
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
@ -90,6 +90,14 @@ def get_task_logs():
|
|||||||
return Response(json_str, mimetype="application/json")
|
return Response(json_str, mimetype="application/json")
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/task/pop_all")
|
||||||
|
@auth.login_required
|
||||||
|
def pop_queued_tasks():
|
||||||
|
|
||||||
|
json_str = json.dumps([task.to_json() for task in tm.pop_tasks()])
|
||||||
|
return Response(json_str, mimetype="application/json")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/stats/")
|
@app.route("/stats/")
|
||||||
@auth.login_required
|
@auth.login_required
|
||||||
def get_stats():
|
def get_stats():
|
||||||
|
@ -27,6 +27,9 @@ class TaskManager:
|
|||||||
def get_tasks(self):
|
def get_tasks(self):
|
||||||
return self.db.get_tasks()
|
return self.db.get_tasks()
|
||||||
|
|
||||||
|
def pop_tasks(self):
|
||||||
|
return self.db.pop_all_tasks()
|
||||||
|
|
||||||
def get_current_tasks(self):
|
def get_current_tasks(self):
|
||||||
return self.current_tasks
|
return self.current_tasks
|
||||||
|
|
||||||
|
29
task.py
29
task.py
@ -143,6 +143,21 @@ class CrawlServer:
|
|||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def pop_queued_tasks(self):
|
||||||
|
try:
|
||||||
|
r = requests.get(self.url + "/task/pop_all", headers=self._generate_headers(), verify=False)
|
||||||
|
|
||||||
|
if r.status_code != 200:
|
||||||
|
print("Problem while popping tasks for '" + self.name + "': " + str(r.status_code))
|
||||||
|
print(r.text)
|
||||||
|
|
||||||
|
return [
|
||||||
|
Task(t["website_id"], t["url"], t["priority"], t["callback_type"], t["callback_args"])
|
||||||
|
for t in json.loads(r.text)
|
||||||
|
]
|
||||||
|
except ConnectionError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
class TaskDispatcher:
|
class TaskDispatcher:
|
||||||
|
|
||||||
@ -176,7 +191,7 @@ class TaskDispatcher:
|
|||||||
|
|
||||||
def _get_available_crawl_server(self) -> CrawlServer:
|
def _get_available_crawl_server(self) -> CrawlServer:
|
||||||
|
|
||||||
queued_tasks_by_server = self._get_current_tasks_by_server()
|
queued_tasks_by_server = self._get_queued_tasks_by_server()
|
||||||
server_with_most_free_slots = None
|
server_with_most_free_slots = None
|
||||||
most_free_slots = -10000
|
most_free_slots = -10000
|
||||||
|
|
||||||
@ -253,3 +268,15 @@ class TaskDispatcher:
|
|||||||
stats[server.name] = server_stats
|
stats[server.name] = server_stats
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
def redispatch_queued(self) -> int:
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
for server in self.db.get_crawl_servers():
|
||||||
|
for task in server.pop_queued_tasks():
|
||||||
|
self.dispatch_task(task)
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
|
@ -119,8 +119,8 @@
|
|||||||
<hr>
|
<hr>
|
||||||
<h3>Misc actions</h3>
|
<h3>Misc actions</h3>
|
||||||
|
|
||||||
<a class="btn btn-danger" href="/website/delete_empty">Delete websites with no associated files that are
|
<a class="btn btn-danger" href="/website/delete_empty">Delete websites with no associated files that are not queued</a>
|
||||||
not queued</a>
|
<a class="btn btn-danger" href="/website/redispatch_queued">Re-dispatch queued tasks</a>
|
||||||
|
|
||||||
<hr>
|
<hr>
|
||||||
<a class="btn btn-info" href="/logout">Logout</a>
|
<a class="btn btn-info" href="/logout">Logout</a>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user