mirror of
https://github.com/simon987/sist2.git
synced 2025-04-04 07:52:59 +00:00
408 lines
12 KiB
Python
408 lines
12 KiB
Python
import json
|
|
import logging
|
|
import os.path
|
|
import shlex
|
|
import signal
|
|
import uuid
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from io import TextIOWrapper
|
|
from logging import FileHandler
|
|
from subprocess import Popen
|
|
import subprocess
|
|
from threading import Lock, Thread
|
|
from time import sleep
|
|
from typing import List
|
|
from uuid import uuid4, UUID
|
|
|
|
from hexlib.db import PersistentState
|
|
from pydantic import BaseModel
|
|
|
|
from config import logger, LOG_FOLDER, DATA_FOLDER
|
|
from notifications import Notifications
|
|
from sist2 import ScanOptions, IndexOptions, Sist2
|
|
from state import RUNNING_FRONTENDS, get_log_files_to_remove, delete_log_file
|
|
from web import Sist2Frontend
|
|
from script import UserScript
|
|
|
|
|
|
class JobStatus(Enum):
|
|
CREATED = "created"
|
|
STARTED = "started"
|
|
INDEXED = "indexed"
|
|
FAILED = "failed"
|
|
|
|
|
|
class Sist2Job(BaseModel):
|
|
name: str
|
|
scan_options: ScanOptions
|
|
index_options: IndexOptions
|
|
|
|
user_scripts: List[str] = []
|
|
|
|
cron_expression: str
|
|
schedule_enabled: bool = False
|
|
|
|
keep_last_n_logs: int = -1
|
|
|
|
previous_index: str = None
|
|
index_path: str = None
|
|
previous_index_path: str = None
|
|
last_index_date: datetime = None
|
|
status: JobStatus = JobStatus("created")
|
|
last_modified: datetime
|
|
etag: str = None
|
|
do_full_scan: bool = False
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
@staticmethod
|
|
def create_default(name: str):
|
|
return Sist2Job(
|
|
name=name,
|
|
scan_options=ScanOptions(path="/"),
|
|
index_options=IndexOptions(),
|
|
last_modified=datetime.utcnow(),
|
|
cron_expression="0 0 * * *"
|
|
)
|
|
|
|
|
|
class Sist2TaskProgress:
|
|
|
|
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
|
|
self.done = done
|
|
self.count = count
|
|
self.index_size = index_size
|
|
self.store_size = tn_size
|
|
self.waiting = waiting
|
|
|
|
def percent(self):
|
|
return (self.done / self.count) if self.count else 0
|
|
|
|
|
|
class Sist2Task:
|
|
|
|
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
|
|
self.job = job
|
|
self.display_name = display_name
|
|
|
|
self.progress = Sist2TaskProgress()
|
|
self.id = uuid4()
|
|
self.pid = None
|
|
self.started = None
|
|
self.ended = None
|
|
self.depends_on = depends_on
|
|
|
|
self._logger = logging.Logger(name=f"{self.id}")
|
|
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
|
|
|
|
def json(self):
|
|
return {
|
|
"id": self.id,
|
|
"job": self.job,
|
|
"display_name": self.display_name,
|
|
"progress": self.progress,
|
|
"started": self.started,
|
|
"ended": self.ended,
|
|
"depends_on": self.depends_on,
|
|
}
|
|
|
|
def log_callback(self, log_json):
|
|
|
|
if "progress" in log_json:
|
|
self.progress = Sist2TaskProgress(**log_json["progress"])
|
|
elif self._logger:
|
|
self._logger.info(json.dumps(log_json))
|
|
|
|
def run(self, sist2: Sist2, db: PersistentState):
|
|
self.started = datetime.utcnow()
|
|
|
|
logger.info(f"Started task {self.display_name}")
|
|
|
|
|
|
class Sist2ScanTask(Sist2Task):
|
|
|
|
def run(self, sist2: Sist2, db: PersistentState):
|
|
super().run(sist2, db)
|
|
|
|
self.job.scan_options.name = self.job.name
|
|
|
|
if self.job.index_path is not None and not self.job.do_full_scan:
|
|
self.job.scan_options.output = self.job.index_path
|
|
else:
|
|
self.job.scan_options.output = None
|
|
|
|
def set_pid(pid):
|
|
self.pid = pid
|
|
|
|
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
|
|
self.ended = datetime.utcnow()
|
|
|
|
is_ok = return_code in (0, 1)
|
|
|
|
if not is_ok:
|
|
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
|
|
logger.info(f"Task {self.display_name} failed ({return_code})")
|
|
else:
|
|
self.job.index_path = self.job.scan_options.output
|
|
self.job.last_index_date = datetime.utcnow()
|
|
self.job.do_full_scan = False
|
|
db["jobs"][self.job.name] = self.job
|
|
self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
|
|
|
|
logger.info(f"Completed {self.display_name} ({return_code=})")
|
|
|
|
# Remove old index
|
|
if is_ok:
|
|
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
|
|
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
|
|
try:
|
|
os.remove(self.job.previous_index_path)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
self.job.previous_index_path = self.job.index_path
|
|
db["jobs"][self.job.name] = self.job
|
|
|
|
return return_code
|
|
|
|
|
|
class Sist2IndexTask(Sist2Task):
|
|
|
|
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
|
|
super().__init__(job, display_name, depends_on=depends_on.id)
|
|
|
|
def run(self, sist2: Sist2, db: PersistentState):
|
|
super().run(sist2, db)
|
|
|
|
self.job.index_options.path = self.job.scan_options.output
|
|
|
|
search_backend = db["search_backends"][self.job.index_options.search_backend]
|
|
if search_backend is None:
|
|
logger.error(f"Error while running task: search backend not found: {self.job.index_options.search_backend}")
|
|
return -1
|
|
|
|
logger.debug(f"Fetched search backend options for {self.job.index_options.search_backend}")
|
|
|
|
return_code = sist2.index(self.job.index_options, search_backend, logs_cb=self.log_callback)
|
|
self.ended = datetime.utcnow()
|
|
|
|
duration = self.ended - self.started
|
|
|
|
ok = return_code in (0, 1)
|
|
|
|
if ok:
|
|
self.restart_running_frontends(db, sist2)
|
|
|
|
# Update status
|
|
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
|
|
self.job.previous_index_path = self.job.index_path
|
|
db["jobs"][self.job.name] = self.job
|
|
|
|
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
|
|
|
|
logger.info(f"Completed {self.display_name} ({return_code=})")
|
|
|
|
return return_code
|
|
|
|
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
|
|
for frontend_name, pid in RUNNING_FRONTENDS.items():
|
|
frontend = db["frontends"][frontend_name]
|
|
frontend: Sist2Frontend
|
|
|
|
try:
|
|
os.kill(pid, signal.SIGTERM)
|
|
except ProcessLookupError:
|
|
pass
|
|
try:
|
|
os.wait()
|
|
except ChildProcessError:
|
|
pass
|
|
|
|
backend_name = frontend.web_options.search_backend
|
|
search_backend = db["search_backends"][backend_name]
|
|
if search_backend is None:
|
|
logger.error(f"Error while running task: search backend not found: {backend_name}")
|
|
return -1
|
|
|
|
logger.debug(f"Fetched search backend options for {backend_name}")
|
|
|
|
frontend.web_options.indices = [
|
|
os.path.join(DATA_FOLDER, db["jobs"][j].index_path)
|
|
for j in frontend.jobs
|
|
]
|
|
|
|
pid = sist2.web(frontend.web_options, search_backend, frontend.name)
|
|
RUNNING_FRONTENDS[frontend_name] = pid
|
|
|
|
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
|
|
|
|
|
|
class Sist2UserScriptTask(Sist2Task):
|
|
|
|
def __init__(self, user_script: UserScript, job: Sist2Job, display_name: str, depends_on: Sist2Task = None):
|
|
super().__init__(job, display_name, depends_on=depends_on.id if depends_on else None)
|
|
self.user_script = user_script
|
|
|
|
def run(self, sist2: Sist2, db: PersistentState):
|
|
super().run(sist2, db)
|
|
|
|
try:
|
|
self.user_script.setup(self.log_callback)
|
|
except Exception as e:
|
|
logger.error(f"Setup for {self.user_script.name} failed: ")
|
|
logger.exception(e)
|
|
self.log_callback({"sist2-admin": f"Setup for {self.user_script.name} failed: {e}"})
|
|
return -1
|
|
|
|
executable = self.user_script.get_executable()
|
|
index_path = os.path.join(DATA_FOLDER, self.job.index_path)
|
|
extra_args = self.user_script.extra_args
|
|
|
|
args = [
|
|
executable,
|
|
index_path,
|
|
*shlex.split(extra_args)
|
|
]
|
|
|
|
self.log_callback({"sist2-admin": f"Starting user script with {executable=}, {index_path=}, {extra_args=}"})
|
|
|
|
proc = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.user_script.script_dir())
|
|
self.pid = proc.pid
|
|
|
|
t_stderr = Thread(target=self._consume_logs, args=(self.log_callback, proc, "stderr", False))
|
|
t_stderr.start()
|
|
|
|
self._consume_logs(self.log_callback, proc, "stdout", True)
|
|
|
|
self.ended = datetime.utcnow()
|
|
|
|
return 0
|
|
|
|
@staticmethod
|
|
def _consume_logs(logs_cb, proc, stream, wait):
|
|
pipe_wrapper = TextIOWrapper(getattr(proc, stream), encoding="utf8", errors="ignore")
|
|
try:
|
|
for line in pipe_wrapper:
|
|
if line.strip() == "":
|
|
continue
|
|
if line.startswith("$PROGRESS"):
|
|
progress = json.loads(line[len("$PROGRESS "):])
|
|
logs_cb({"progress": progress})
|
|
continue
|
|
logs_cb({stream: line})
|
|
finally:
|
|
if wait:
|
|
proc.wait()
|
|
pipe_wrapper.close()
|
|
|
|
|
|
class TaskQueue:
|
|
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
|
|
self._lock = Lock()
|
|
|
|
self._sist2 = sist2
|
|
self._db = db
|
|
self._notifications = notifications
|
|
|
|
self._tasks = {}
|
|
self._queue = []
|
|
self._sem = 0
|
|
|
|
self._thread = Thread(target=self._check_new_task, daemon=True)
|
|
self._thread.start()
|
|
|
|
def _tasks_failed(self):
|
|
done = set()
|
|
|
|
for row in self._db["task_done"].sql("WHERE return_code NOT IN (0,1)"):
|
|
done.add(uuid.UUID(row["id"]))
|
|
|
|
return done
|
|
|
|
def _tasks_done(self):
|
|
|
|
done = set()
|
|
|
|
for row in self._db["task_done"]:
|
|
done.add(uuid.UUID(row["id"]))
|
|
|
|
return done
|
|
|
|
def _check_new_task(self):
|
|
while True:
|
|
with self._lock:
|
|
for task in list(self._queue):
|
|
task: Sist2Task
|
|
|
|
if self._sem >= 1:
|
|
break
|
|
|
|
if not task.depends_on or task.depends_on in self._tasks_done():
|
|
self._queue.remove(task)
|
|
|
|
if task.depends_on in self._tasks_failed():
|
|
# The task which we depend on failed, continue
|
|
continue
|
|
|
|
self._sem += 1
|
|
|
|
t = Thread(target=self._run_task, args=(task,))
|
|
|
|
self._tasks[task.id] = {
|
|
"task": task,
|
|
"thread": t,
|
|
}
|
|
|
|
t.start()
|
|
break
|
|
sleep(1)
|
|
|
|
def tasks(self):
|
|
return list(map(lambda t: t["task"], self._tasks.values()))
|
|
|
|
def kill_task(self, task_id):
|
|
|
|
task = self._tasks.get(UUID(task_id))
|
|
|
|
if task:
|
|
pid = task["task"].pid
|
|
logger.info(f"Killing task {task_id} (pid={pid})")
|
|
os.kill(pid, signal.SIGTERM)
|
|
return True
|
|
|
|
return False
|
|
|
|
def _run_task(self, task: Sist2Task):
|
|
task_result = task.run(self._sist2, self._db)
|
|
|
|
with self._lock:
|
|
del self._tasks[task.id]
|
|
self._sem -= 1
|
|
|
|
self._db["task_done"][task.id] = {
|
|
"ended": task.ended,
|
|
"started": task.started,
|
|
"name": task.display_name,
|
|
"return_code": task_result,
|
|
"has_logs": 1
|
|
}
|
|
|
|
logs_to_delete = get_log_files_to_remove(self._db, task.job.name, task.job.keep_last_n_logs)
|
|
for row in logs_to_delete:
|
|
delete_log_file(self._db, row["id"])
|
|
|
|
if isinstance(task, Sist2IndexTask):
|
|
self._notifications.notify({
|
|
"message": "notifications.indexCompleted",
|
|
"job": task.job.name
|
|
})
|
|
|
|
def submit(self, task: Sist2Task):
|
|
|
|
logger.info(f"Submitted task to queue {task.display_name}")
|
|
|
|
with self._lock:
|
|
self._queue.append(task)
|