mirror of
https://github.com/simon987/od-database.git
synced 2025-04-16 08:56:44 +00:00
125 lines
4.0 KiB
Python
125 lines
4.0 KiB
Python
import os
|
|
import json
|
|
import sqlite3
|
|
|
|
|
|
class TaskResult:
|
|
|
|
def __init__(self, status_code=None, file_count=0, start_time=0, end_time=0, website_id=0):
|
|
self.status_code = status_code
|
|
self.file_count = file_count
|
|
self.start_time = start_time
|
|
self.end_time = end_time
|
|
self.website_id = website_id
|
|
|
|
def to_json(self):
|
|
return {
|
|
"status_code": self.status_code,
|
|
"file_count": self.file_count,
|
|
"start_time": self.start_time,
|
|
"end_time": self.end_time,
|
|
"website_id": self.website_id
|
|
}
|
|
|
|
|
|
class Task:
|
|
|
|
def __init__(self, website_id: int, url: str, priority: int = 1,
|
|
callback_type: str = None, callback_args: str = None):
|
|
self.website_id = website_id
|
|
self.url = url
|
|
self.priority = priority
|
|
self.callback_type = callback_type
|
|
self.callback_args = json.loads(callback_args) if callback_args else {}
|
|
|
|
def to_json(self):
|
|
return {
|
|
"website_id": self.website_id,
|
|
"url": self.url,
|
|
"priority": self.priority,
|
|
"callback_type": self.callback_type,
|
|
"callback_args": json.dumps(self.callback_args)
|
|
}
|
|
|
|
def __repr__(self):
|
|
return json.dumps(self.to_json())
|
|
|
|
|
|
class TaskManagerDatabase:
|
|
|
|
def __init__(self, db_path):
|
|
self.db_path = db_path
|
|
|
|
if not os.path.exists(db_path):
|
|
self.init_database()
|
|
|
|
def init_database(self):
|
|
|
|
with open("task_db_init.sql", "r") as f:
|
|
init_script = f.read()
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
conn.executescript(init_script)
|
|
conn.commit()
|
|
|
|
def pop_task(self):
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT id, website_id, url, priority, callback_type, callback_args"
|
|
" FROM Queue ORDER BY priority DESC, Queue.id ASC LIMIT 1")
|
|
task = cursor.fetchone()
|
|
|
|
if task:
|
|
cursor.execute("DELETE FROM Queue WHERE id=?", (task[0],))
|
|
conn.commit()
|
|
return Task(task[1], task[2], task[3], task[4], task[5])
|
|
else:
|
|
return None
|
|
|
|
def put_task(self, task: Task):
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("INSERT INTO Queue (website_id, url, priority, callback_type, callback_args) "
|
|
"VALUES (?,?,?,?,?)",
|
|
(task.website_id, task.url, task.priority,
|
|
task.callback_type, json.dumps(task.callback_args)))
|
|
conn.commit()
|
|
|
|
def get_tasks(self):
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT website_id, url, priority, callback_type, callback_args FROM Queue")
|
|
tasks = cursor.fetchall()
|
|
|
|
return [Task(t[0], t[1], t[2], t[3], t[4]) for t in tasks]
|
|
|
|
def log_result(self, result: TaskResult):
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("INSERT INTO TaskResult (website_id, status_code, file_count, start_time, end_time) "
|
|
"VALUES (?,?,?,?,?)", (result.website_id, result.status_code, result.file_count,
|
|
result.start_time, result.end_time))
|
|
conn.commit()
|
|
|
|
def get_non_indexed_results(self):
|
|
"""Get a list of new TaskResults since the last call of this method"""
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("SELECT status_code, file_count, start_time, end_time, website_id"
|
|
" FROM TaskResult WHERE indexed_time != NULL")
|
|
db_result = cursor.fetchall()
|
|
|
|
cursor.execute("UPDATE TaskResult SET indexed_time = CURRENT_TIMESTAMP")
|
|
|
|
return [TaskResult(r[0], r[1], r[2], r[3], r[4]) for r in db_result]
|