mirror of
https://github.com/simon987/sist2.git
synced 2025-12-10 22:18:54 +00:00
Add sist2-admin, update Dockerfile & docker-compose
This commit is contained in:
392
sist2-admin/sist2_admin/app.py
Normal file
392
sist2-admin/sist2_admin/app.py
Normal file
@@ -0,0 +1,392 @@
|
||||
import asyncio
|
||||
import os
|
||||
import signal
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from hexlib.db import PersistentState
|
||||
from requests import ConnectionError
|
||||
from requests.exceptions import SSLError
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
from starlette.responses import RedirectResponse
|
||||
from starlette.staticfiles import StaticFiles
|
||||
from starlette.websockets import WebSocket
|
||||
from websockets.exceptions import ConnectionClosed
|
||||
|
||||
import cron
|
||||
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
|
||||
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
|
||||
from notifications import Subscribe, Notifications
|
||||
from sist2 import Sist2
|
||||
from state import PickleTable, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
|
||||
from web import Sist2Frontend
|
||||
|
||||
VERSION = "1.0"
|
||||
|
||||
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
|
||||
db = PersistentState(table_factory=PickleTable, dbfile=os.path.join(DATA_FOLDER, "state.db"))
|
||||
notifications = Notifications()
|
||||
task_queue = TaskQueue(sist2, db, notifications)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_credentials=True,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
app.mount("/ui/", StaticFiles(directory="./frontend/dist", html=True), name="static")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def home():
|
||||
return RedirectResponse("ui")
|
||||
|
||||
|
||||
@app.get("/api")
|
||||
async def api():
|
||||
return {
|
||||
"version": VERSION,
|
||||
"tesseract_langs": TESSERACT_LANGS,
|
||||
"logs_folder": LOG_FOLDER
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/job/{name:str}")
|
||||
async def get_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if row:
|
||||
return row["job"]
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
|
||||
@app.get("/api/frontend/{name:str}")
|
||||
async def get_frontend(name: str):
|
||||
row = db["frontends"][name]
|
||||
if row:
|
||||
frontend = row["frontend"]
|
||||
frontend: Sist2Frontend
|
||||
frontend.running = frontend.name in RUNNING_FRONTENDS
|
||||
return frontend
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
|
||||
@app.get("/api/job/")
|
||||
async def get_jobs():
|
||||
return [row["job"] for row in db["jobs"]]
|
||||
|
||||
|
||||
@app.put("/api/job/{name:str}")
|
||||
async def update_job(name: str, job: Sist2Job):
|
||||
# TODO: Check etag
|
||||
|
||||
job.last_modified = datetime.now()
|
||||
row = db["jobs"][name]
|
||||
if not row:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
args_that_trigger_full_scan = [
|
||||
"path",
|
||||
"thumbnail_count",
|
||||
"thumbnail_quality",
|
||||
"thumbnail_size",
|
||||
"content_size",
|
||||
"depth",
|
||||
"archive",
|
||||
"archive_passphrase",
|
||||
"ocr_lang",
|
||||
"ocr_images",
|
||||
"ocr_ebooks",
|
||||
"fast",
|
||||
"checksums",
|
||||
"read_subtitles",
|
||||
]
|
||||
for arg in args_that_trigger_full_scan:
|
||||
if getattr(row["job"].scan_options, arg) != getattr(job.scan_options, arg):
|
||||
job.do_full_scan = True
|
||||
|
||||
db["jobs"][name] = {"job": job}
|
||||
|
||||
|
||||
@app.put("/api/frontend/{name:str}")
|
||||
async def update_frontend(name: str, frontend: Sist2Frontend):
|
||||
db["frontends"][name] = {"frontend": frontend}
|
||||
|
||||
# TODO: Check etag
|
||||
|
||||
return "ok"
|
||||
|
||||
|
||||
@app.get("/api/task/")
|
||||
async def get_tasks():
|
||||
return list(map(lambda t: t.json(), task_queue.tasks()))
|
||||
|
||||
|
||||
@app.get("/api/task/history")
|
||||
async def task_history():
|
||||
return list(db["task_done"].sql("ORDER BY started DESC"))
|
||||
|
||||
|
||||
@app.post("/api/task/{task_id:str}/kill")
|
||||
async def kill_job(task_id: str):
|
||||
return task_queue.kill_task(task_id)
|
||||
|
||||
|
||||
def _run_job(job: Sist2Job):
|
||||
job.last_modified = datetime.now()
|
||||
if job.status == JobStatus("created"):
|
||||
job.status = JobStatus("started")
|
||||
db["jobs"][job.name] = {"job": job}
|
||||
|
||||
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
|
||||
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
|
||||
|
||||
task_queue.submit(scan_task)
|
||||
task_queue.submit(index_task)
|
||||
|
||||
|
||||
@app.get("/api/job/{name:str}/run")
|
||||
async def run_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if not row:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
_run_job(row["job"])
|
||||
|
||||
return "ok"
|
||||
|
||||
|
||||
@app.delete("/api/job/{name:str}")
|
||||
async def delete_job(name: str):
|
||||
row = db["jobs"][name]
|
||||
if row:
|
||||
del db["jobs"][name]
|
||||
else:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
|
||||
@app.delete("/api/frontend/{name:str}")
|
||||
async def delete_frontend(name: str):
|
||||
if name in RUNNING_FRONTENDS:
|
||||
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
|
||||
del RUNNING_FRONTENDS[name]
|
||||
|
||||
row = db["frontends"][name]
|
||||
if row:
|
||||
del db["frontends"][name]
|
||||
else:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
|
||||
@app.post("/api/job/{name:str}")
|
||||
async def create_job(name: str):
|
||||
if db["jobs"][name]:
|
||||
raise ValueError("Job with the same name already exists")
|
||||
|
||||
job = Sist2Job.create_default(name)
|
||||
db["jobs"][name] = {"job": job}
|
||||
|
||||
return job
|
||||
|
||||
|
||||
@app.post("/api/frontend/{name:str}")
|
||||
async def create_frontend(name: str):
|
||||
if db["frontend"][name]:
|
||||
raise ValueError("Frontend with the same name already exists")
|
||||
|
||||
frontend = Sist2Frontend.create_default(name)
|
||||
db["frontends"][name] = {"frontend": frontend}
|
||||
|
||||
return frontend
|
||||
|
||||
|
||||
@app.get("/api/ping_es")
|
||||
async def ping_es(url: str, insecure: bool):
|
||||
return check_es_version(url, insecure)
|
||||
|
||||
|
||||
def check_es_version(es_url: str, insecure: bool):
|
||||
try:
|
||||
url = urlparse(es_url)
|
||||
if url.username:
|
||||
auth = (url.username, url.password)
|
||||
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
|
||||
else:
|
||||
auth = None
|
||||
r = requests.get(es_url, verify=insecure, auth=auth)
|
||||
except SSLError:
|
||||
return {
|
||||
"ok": False,
|
||||
"message": "Invalid SSL certificate"
|
||||
}
|
||||
except ConnectionError as e:
|
||||
return {
|
||||
"ok": False,
|
||||
"message": "Connection refused"
|
||||
}
|
||||
except ValueError as e:
|
||||
return {
|
||||
"ok": False,
|
||||
"message": str(e)
|
||||
}
|
||||
|
||||
if r.status_code == 401:
|
||||
return {
|
||||
"ok": False,
|
||||
"message": "Authentication failure"
|
||||
}
|
||||
|
||||
try:
|
||||
return {
|
||||
"ok": True,
|
||||
"message": "Elasticsearch version " + r.json()["version"]["number"]
|
||||
}
|
||||
except:
|
||||
return {
|
||||
"ok": False,
|
||||
"message": "Could not read version"
|
||||
}
|
||||
|
||||
|
||||
def start_frontend_(frontend: Sist2Frontend):
|
||||
frontend.web_options.indices = list(map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs))
|
||||
|
||||
pid = sist2.web(frontend.web_options, frontend.name)
|
||||
RUNNING_FRONTENDS[frontend.name] = pid
|
||||
|
||||
|
||||
@app.post("/api/frontend/{name:str}/start")
|
||||
async def start_frontend(name: str):
|
||||
row = db["frontends"][name]
|
||||
if not row:
|
||||
raise HTTPException(status_code=404)
|
||||
|
||||
start_frontend_(row["frontend"])
|
||||
|
||||
|
||||
@app.post("/api/frontend/{name:str}/stop")
|
||||
async def stop_frontend(name: str):
|
||||
if name in RUNNING_FRONTENDS:
|
||||
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
|
||||
del RUNNING_FRONTENDS[name]
|
||||
|
||||
|
||||
@app.get("/api/frontend/")
|
||||
async def get_frontends():
|
||||
res = []
|
||||
for row in db["frontends"]:
|
||||
frontend = row["frontend"]
|
||||
frontend: Sist2Frontend
|
||||
frontend.running = frontend.name in RUNNING_FRONTENDS
|
||||
res.append(frontend)
|
||||
return res
|
||||
|
||||
|
||||
def tail(filepath: str, n: int):
|
||||
with open(filepath) as file:
|
||||
|
||||
reached_eof = False
|
||||
buffer = []
|
||||
|
||||
line = ""
|
||||
while True:
|
||||
tmp = file.readline()
|
||||
if tmp:
|
||||
line += tmp
|
||||
|
||||
if line.endswith("\n"):
|
||||
|
||||
if reached_eof:
|
||||
yield line
|
||||
else:
|
||||
if len(buffer) > n:
|
||||
buffer.pop(0)
|
||||
buffer.append(line)
|
||||
line = ""
|
||||
else:
|
||||
if not reached_eof:
|
||||
reached_eof = True
|
||||
yield from buffer
|
||||
yield None
|
||||
|
||||
|
||||
@app.websocket("/notifications")
|
||||
async def ws_tail_log(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
await websocket.receive_text()
|
||||
|
||||
async with Subscribe(notifications) as ob:
|
||||
async for notification in ob.notifications():
|
||||
await websocket.send_json(notification)
|
||||
print(notification)
|
||||
|
||||
except ConnectionClosed:
|
||||
return
|
||||
|
||||
|
||||
@app.websocket("/log/{task_id}")
|
||||
async def ws_tail_log(websocket: WebSocket, task_id: str, n: int):
|
||||
log_file = os.path.join(LOG_FOLDER, f"sist2-{task_id}.log")
|
||||
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
await websocket.receive_text()
|
||||
except ConnectionClosed:
|
||||
return
|
||||
|
||||
while True:
|
||||
for line in tail(log_file, n):
|
||||
|
||||
try:
|
||||
if line:
|
||||
await websocket.send_text(line)
|
||||
else:
|
||||
await websocket.send_json({"ping": ""})
|
||||
await asyncio.sleep(0.1)
|
||||
except ConnectionClosed:
|
||||
return
|
||||
|
||||
|
||||
def main():
|
||||
uvicorn.run(app, port=WEBSERVER_PORT, host="0.0.0.0")
|
||||
|
||||
|
||||
def initialize_db():
|
||||
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
|
||||
|
||||
frontend = Sist2Frontend.create_default("default")
|
||||
db["frontends"]["default"] = {"frontend": frontend}
|
||||
|
||||
logger.info("Initialized database.")
|
||||
|
||||
|
||||
def start_frontends():
|
||||
for row in db["frontends"]:
|
||||
frontend: Sist2Frontend = row["frontend"]
|
||||
if frontend.auto_start and len(frontend.jobs) > 0:
|
||||
start_frontend_(frontend)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if not db["sist2_admin"]["info"]:
|
||||
initialize_db()
|
||||
elif db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
|
||||
print("Database has incompatible schema version! Delete state.db to continue.")
|
||||
exit(-1)
|
||||
|
||||
start_frontends()
|
||||
cron.initialize(db, _run_job)
|
||||
|
||||
logger.info("Started sist2-admin. Hello!")
|
||||
|
||||
main()
|
||||
30
sist2-admin/sist2_admin/config.py
Normal file
30
sist2-admin/sist2_admin/config.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import os
|
||||
import logging
|
||||
import sys
|
||||
from logging import StreamHandler
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
MAX_LOG_SIZE = 1 * 1024 * 1024
|
||||
|
||||
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
|
||||
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
|
||||
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
|
||||
WEBSERVER_PORT = 8080
|
||||
|
||||
os.makedirs(LOG_FOLDER, exist_ok=True)
|
||||
os.makedirs(DATA_FOLDER, exist_ok=True)
|
||||
|
||||
logger = logging.Logger("sist2-admin")
|
||||
|
||||
_log_file = os.path.join(LOG_FOLDER, "sist2-admin.log")
|
||||
_log_fmt = "%(asctime)s [%(levelname)s] %(message)s"
|
||||
_log_formatter = logging.Formatter(_log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
console_handler = StreamHandler(sys.stdout)
|
||||
console_handler.setFormatter(_log_formatter)
|
||||
|
||||
file_handler = RotatingFileHandler(_log_file, mode="a", maxBytes=MAX_LOG_SIZE, backupCount=1)
|
||||
file_handler.setFormatter(_log_formatter)
|
||||
|
||||
logger.addHandler(console_handler)
|
||||
logger.addHandler(file_handler)
|
||||
33
sist2-admin/sist2_admin/cron.py
Normal file
33
sist2-admin/sist2_admin/cron.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from threading import Thread
|
||||
|
||||
import pycron
|
||||
import time
|
||||
|
||||
from hexlib.db import PersistentState
|
||||
|
||||
from config import logger
|
||||
from jobs import Sist2Job
|
||||
|
||||
|
||||
def _check_schedule(db: PersistentState, run_job):
|
||||
for job in (row["job"] for row in db["jobs"]):
|
||||
job: Sist2Job
|
||||
|
||||
if job.schedule_enabled:
|
||||
if pycron.is_now(job.cron_expression):
|
||||
logger.info(f"Submit scan task to queue for [{job.name}]")
|
||||
run_job(job)
|
||||
|
||||
|
||||
def _cron_thread(db, run_job):
|
||||
time.sleep(60 - (time.time() % 60))
|
||||
start = time.time()
|
||||
|
||||
while True:
|
||||
_check_schedule(db, run_job)
|
||||
time.sleep(60 - ((time.time() - start) % 60))
|
||||
|
||||
|
||||
def initialize(db, run_job):
|
||||
t = Thread(target=_cron_thread, args=(db, run_job), daemon=True, name="timer")
|
||||
t.start()
|
||||
315
sist2-admin/sist2_admin/jobs.py
Normal file
315
sist2-admin/sist2_admin/jobs.py
Normal file
@@ -0,0 +1,315 @@
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import shutil
|
||||
import signal
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from hashlib import md5
|
||||
from logging import FileHandler
|
||||
from threading import Lock, Thread
|
||||
from time import sleep
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
from hexlib.db import PersistentState
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
from config import logger, LOG_FOLDER
|
||||
from notifications import Notifications
|
||||
from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
|
||||
from state import RUNNING_FRONTENDS
|
||||
from web import Sist2Frontend
|
||||
|
||||
|
||||
class JobStatus(Enum):
|
||||
CREATED = "created"
|
||||
STARTED = "started"
|
||||
INDEXED = "indexed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class Sist2Job(BaseModel):
|
||||
name: str
|
||||
scan_options: ScanOptions
|
||||
index_options: IndexOptions
|
||||
|
||||
cron_expression: str
|
||||
schedule_enabled: bool = False
|
||||
|
||||
previous_index: str = None
|
||||
last_index: str = None
|
||||
last_index_date: datetime = None
|
||||
status: JobStatus = JobStatus("created")
|
||||
last_modified: datetime
|
||||
etag: str = None
|
||||
do_full_scan: bool = False
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@staticmethod
|
||||
def create_default(name: str):
|
||||
return Sist2Job(
|
||||
name=name,
|
||||
scan_options=ScanOptions(path="/"),
|
||||
index_options=IndexOptions(),
|
||||
last_modified=datetime.now(),
|
||||
cron_expression="0 0 * * *"
|
||||
)
|
||||
|
||||
@validator("etag", always=True)
|
||||
def validate_etag(cls, value, values):
|
||||
s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
|
||||
return md5(s.encode()).hexdigest()
|
||||
|
||||
|
||||
class Sist2TaskProgress:
|
||||
|
||||
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
|
||||
self.done = done
|
||||
self.count = count
|
||||
self.index_size = index_size
|
||||
self.store_size = tn_size
|
||||
self.waiting = waiting
|
||||
|
||||
def percent(self):
|
||||
return (self.done / self.count) if self.count else 0
|
||||
|
||||
|
||||
class Sist2Task:
|
||||
|
||||
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
|
||||
self.job = job
|
||||
self.display_name = display_name
|
||||
|
||||
self.progress = Sist2TaskProgress()
|
||||
self.id = uuid4()
|
||||
self.pid = None
|
||||
self.started = None
|
||||
self.ended = None
|
||||
self.depends_on = depends_on
|
||||
|
||||
self._logger = logging.Logger(name=f"{self.id}")
|
||||
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
|
||||
|
||||
def json(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"job": self.job,
|
||||
"display_name": self.display_name,
|
||||
"progress": self.progress,
|
||||
"started": self.started,
|
||||
"ended": self.ended,
|
||||
"depends_on": self.depends_on,
|
||||
}
|
||||
|
||||
def log_callback(self, log_json):
|
||||
|
||||
if "progress" in log_json:
|
||||
self.progress = Sist2TaskProgress(**log_json["progress"])
|
||||
elif self._logger:
|
||||
self._logger.info(json.dumps(log_json))
|
||||
|
||||
def run(self, sist2: Sist2, db: PersistentState):
|
||||
self.started = datetime.now()
|
||||
|
||||
logger.info(f"Started task {self.display_name}")
|
||||
|
||||
|
||||
class Sist2ScanTask(Sist2Task):
|
||||
|
||||
def run(self, sist2: Sist2, db: PersistentState):
|
||||
super().run(sist2, db)
|
||||
|
||||
self.job.scan_options.name = self.job.name
|
||||
|
||||
if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
|
||||
self.job.scan_options.incremental = self.job.last_index
|
||||
else:
|
||||
self.job.scan_options.incremental = None
|
||||
|
||||
def set_pid(pid):
|
||||
self.pid = pid
|
||||
|
||||
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
|
||||
self.ended = datetime.now()
|
||||
|
||||
if return_code != 0:
|
||||
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
|
||||
logger.info(f"Task {self.display_name} failed ({return_code})")
|
||||
else:
|
||||
index = Sist2Index(self.job.scan_options.output)
|
||||
|
||||
# Save latest index
|
||||
self.job.previous_index = self.job.last_index
|
||||
|
||||
self.job.last_index = index.path
|
||||
self.job.last_index_date = datetime.now()
|
||||
self.job.do_full_scan = False
|
||||
db["jobs"][self.job.name] = {"job": self.job}
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
|
||||
|
||||
logger.info(f"Completed {self.display_name} ({return_code=})")
|
||||
|
||||
return return_code
|
||||
|
||||
|
||||
class Sist2IndexTask(Sist2Task):
|
||||
|
||||
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
|
||||
super().__init__(job, display_name, depends_on=depends_on.id)
|
||||
|
||||
def run(self, sist2: Sist2, db: PersistentState):
|
||||
super().run(sist2, db)
|
||||
|
||||
self.job.index_options.path = self.job.scan_options.output
|
||||
|
||||
return_code = sist2.index(self.job.index_options, logs_cb=self.log_callback)
|
||||
self.ended = datetime.now()
|
||||
|
||||
duration = self.ended - self.started
|
||||
|
||||
ok = return_code == 0
|
||||
|
||||
if ok:
|
||||
# Remove old index
|
||||
if self.job.previous_index is not None:
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
|
||||
try:
|
||||
shutil.rmtree(self.job.previous_index)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
self.restart_running_frontends(db, sist2)
|
||||
|
||||
# Update status
|
||||
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
|
||||
db["jobs"][self.job.name] = {"job": self.job}
|
||||
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
|
||||
|
||||
logger.info(f"Completed {self.display_name} ({return_code=})")
|
||||
|
||||
return return_code
|
||||
|
||||
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
|
||||
for frontend_name, pid in RUNNING_FRONTENDS.items():
|
||||
frontend = db["frontends"][frontend_name]["frontend"]
|
||||
frontend: Sist2Frontend
|
||||
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
try:
|
||||
os.wait()
|
||||
except ChildProcessError:
|
||||
pass
|
||||
|
||||
frontend.web_options.indices = map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs)
|
||||
|
||||
pid = sist2.web(frontend.web_options, frontend.name)
|
||||
RUNNING_FRONTENDS[frontend_name] = pid
|
||||
|
||||
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
|
||||
|
||||
|
||||
class TaskQueue:
|
||||
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
|
||||
self._lock = Lock()
|
||||
|
||||
self._sist2 = sist2
|
||||
self._db = db
|
||||
self._notifications = notifications
|
||||
|
||||
self._tasks = {}
|
||||
self._queue = []
|
||||
self._sem = 0
|
||||
|
||||
self._thread = Thread(target=self._check_new_task, daemon=True)
|
||||
self._thread.start()
|
||||
|
||||
def _tasks_failed(self):
|
||||
done = set()
|
||||
|
||||
for row in self._db["task_done"].sql("WHERE return_code != 0"):
|
||||
done.add(uuid.UUID(row["id"]))
|
||||
|
||||
return done
|
||||
|
||||
def _tasks_done(self):
|
||||
|
||||
done = set()
|
||||
|
||||
for row in self._db["task_done"]:
|
||||
done.add(uuid.UUID(row["id"]))
|
||||
|
||||
return done
|
||||
|
||||
def _check_new_task(self):
|
||||
while True:
|
||||
with self._lock:
|
||||
for task in list(self._queue):
|
||||
task: Sist2Task
|
||||
|
||||
if self._sem >= 1:
|
||||
break
|
||||
|
||||
if not task.depends_on or task.depends_on in self._tasks_done():
|
||||
self._queue.remove(task)
|
||||
|
||||
if task.depends_on in self._tasks_failed():
|
||||
# The task which we depend on failed, continue
|
||||
continue
|
||||
|
||||
self._sem += 1
|
||||
|
||||
t = Thread(target=self._run_task, args=(task,))
|
||||
|
||||
self._tasks[task.id] = {
|
||||
"task": task,
|
||||
"thread": t,
|
||||
}
|
||||
|
||||
t.start()
|
||||
break
|
||||
sleep(1)
|
||||
|
||||
def tasks(self):
|
||||
return list(map(lambda t: t["task"], self._tasks.values()))
|
||||
|
||||
def kill_task(self, task_id):
|
||||
|
||||
task = self._tasks.get(UUID(task_id))
|
||||
|
||||
if task:
|
||||
pid = task["task"].pid
|
||||
logger.info(f"Killing task {task_id} (pid={pid})")
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _run_task(self, task: Sist2Task):
|
||||
task_result = task.run(self._sist2, self._db)
|
||||
|
||||
with self._lock:
|
||||
del self._tasks[task.id]
|
||||
self._sem -= 1
|
||||
|
||||
self._db["task_done"][task.id] = {
|
||||
"ended": task.ended,
|
||||
"started": task.started,
|
||||
"name": task.display_name,
|
||||
"return_code": task_result
|
||||
}
|
||||
if isinstance(task, Sist2IndexTask):
|
||||
self._notifications.notify({
|
||||
"message": "notifications.indexCompleted",
|
||||
"job": task.job.name
|
||||
})
|
||||
|
||||
def submit(self, task: Sist2Task):
|
||||
|
||||
logger.info(f"Submitted task to queue {task.display_name}")
|
||||
|
||||
with self._lock:
|
||||
self._queue.append(task)
|
||||
40
sist2-admin/sist2_admin/notifications.py
Normal file
40
sist2-admin/sist2_admin/notifications.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import asyncio
|
||||
from typing import List
|
||||
|
||||
|
||||
class Notifications:
|
||||
def __init__(self):
|
||||
self._subscribers: List[Subscribe] = []
|
||||
|
||||
def subscribe(self, ob):
|
||||
self._subscribers.append(ob)
|
||||
|
||||
def unsubscribe(self, ob):
|
||||
self._subscribers.remove(ob)
|
||||
|
||||
def notify(self, notification: dict):
|
||||
for ob in self._subscribers:
|
||||
ob.notify(notification)
|
||||
|
||||
|
||||
class Subscribe:
|
||||
def __init__(self, notifications: Notifications):
|
||||
self._queue = []
|
||||
self._notifications = notifications
|
||||
|
||||
async def __aenter__(self):
|
||||
self._notifications.subscribe(self)
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
self._notifications.unsubscribe(self)
|
||||
|
||||
def notify(self, notification: dict):
|
||||
self._queue.append(notification)
|
||||
|
||||
async def notifications(self):
|
||||
while True:
|
||||
try:
|
||||
yield self._queue.pop(0)
|
||||
except IndexError:
|
||||
await asyncio.sleep(0.1)
|
||||
307
sist2-admin/sist2_admin/sist2.py
Normal file
307
sist2-admin/sist2_admin/sist2.py
Normal file
@@ -0,0 +1,307 @@
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os.path
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
from io import TextIOWrapper
|
||||
from logging import FileHandler
|
||||
from subprocess import Popen, PIPE
|
||||
from tempfile import NamedTemporaryFile
|
||||
from threading import Thread
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from config import logger, LOG_FOLDER
|
||||
|
||||
|
||||
class Sist2Version:
|
||||
def __init__(self, version: str):
|
||||
self._version = version
|
||||
|
||||
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.major}.{self.minor}.{self.patch}"
|
||||
|
||||
|
||||
class WebOptions(BaseModel):
|
||||
indices: List[str] = []
|
||||
es_url: str = "http://elasticsearch:9200"
|
||||
es_insecure_ssl: bool = False
|
||||
es_index: str = "sist2"
|
||||
bind: str = "0.0.0.0:4090"
|
||||
auth: str = None
|
||||
tag_auth: str = None
|
||||
tagline: str = "Lightning-fast file system indexer and search tool"
|
||||
dev: bool = False
|
||||
lang: str = "en"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def args(self):
|
||||
args = ["web", f"--es-url={self.es_url}", f"--bind={self.bind}",
|
||||
f"--tagline={self.tagline}", f"--lang={self.lang}"]
|
||||
|
||||
if self.es_insecure_ssl:
|
||||
args.append(f"--es-insecure-ssl")
|
||||
if self.auth:
|
||||
args.append(f"--auth={self.auth}")
|
||||
if self.tag_auth:
|
||||
args.append(f"--tag_auth={self.tag_auth}")
|
||||
if self.dev:
|
||||
args.append(f"--dev")
|
||||
|
||||
args.extend(self.indices)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
class IndexOptions(BaseModel):
|
||||
path: str = None
|
||||
threads: int = 1
|
||||
es_url: str = "http://elasticsearch:9200"
|
||||
es_insecure_ssl: bool = False
|
||||
es_index: str = "sist2"
|
||||
incremental_index: bool = False
|
||||
script: str = ""
|
||||
script_file: str = None
|
||||
batch_size: int = 100
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def args(self):
|
||||
|
||||
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
|
||||
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
|
||||
|
||||
if self.script_file:
|
||||
args.append(f"--script-file={self.script_file}")
|
||||
if self.es_insecure_ssl:
|
||||
args.append(f"--es-insecure-ssl")
|
||||
if self.incremental_index:
|
||||
args.append(f"--incremental-index")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
ARCHIVE_SKIP = "skip"
|
||||
ARCHIVE_LIST = "list"
|
||||
ARCHIVE_SHALLOW = "shallow"
|
||||
ARCHIVE_RECURSE = "recurse"
|
||||
|
||||
|
||||
class ScanOptions(BaseModel):
|
||||
path: str
|
||||
threads: int = 1
|
||||
mem_throttle: int = 0
|
||||
thumbnail_quality: float = 1.0
|
||||
thumbnail_size: int = 500
|
||||
thumbnail_count: int = 1
|
||||
content_size: int = 32768
|
||||
depth: int = -1
|
||||
archive: str = ARCHIVE_RECURSE
|
||||
archive_passphrase: str = None
|
||||
ocr_lang: bool = None
|
||||
ocr_images: bool = False
|
||||
ocr_ebooks: bool = False
|
||||
exclude: str = None
|
||||
fast: bool = False
|
||||
treemap_threshold: float = 0.0005
|
||||
mem_buffer: int = 2000
|
||||
read_subtitles: bool = False
|
||||
fast_epub: bool = False
|
||||
checksums: bool = False
|
||||
incremental: str = None
|
||||
output: str = None
|
||||
name: str = None
|
||||
rewrite_url: str = None
|
||||
list_file: str = None
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def args(self):
|
||||
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
|
||||
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
|
||||
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
|
||||
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
|
||||
|
||||
if self.incremental:
|
||||
args.append(f"--incremental={self.incremental}")
|
||||
if self.rewrite_url:
|
||||
args.append(f"--rewrite-url={self.rewrite_url}")
|
||||
if self.name:
|
||||
args.append(f"--name={self.name}")
|
||||
if self.archive_passphrase:
|
||||
args.append(f"--archive-passphrase={self.archive_passphrase}")
|
||||
if self.ocr_lang:
|
||||
args.append(f"--ocr-lang={self.ocr_lang}")
|
||||
if self.ocr_ebooks:
|
||||
args.append(f"--ocr-ebooks")
|
||||
if self.ocr_images:
|
||||
args.append(f"--ocr-images")
|
||||
if self.exclude:
|
||||
args.append(f"--exclude={self.exclude}")
|
||||
if self.fast:
|
||||
args.append(f"--fast")
|
||||
if self.treemap_threshold:
|
||||
args.append(f"--treemap-threshold={self.treemap_threshold}")
|
||||
if self.read_subtitles:
|
||||
args.append(f"--read-subtitles")
|
||||
if self.fast_epub:
|
||||
args.append(f"--fast-epub")
|
||||
if self.checksums:
|
||||
args.append(f"--checksums")
|
||||
if self.list_file:
|
||||
args.append(f"--list_file={self.list_file}")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
class Sist2Index:
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
with open(os.path.join(path, "descriptor.json")) as f:
|
||||
self._descriptor = json.load(f)
|
||||
|
||||
def to_json(self):
|
||||
return {
|
||||
"path": self.path,
|
||||
"version": self.version(),
|
||||
"timestamp": self.timestamp(),
|
||||
"name": self.name()
|
||||
}
|
||||
|
||||
def version(self) -> Sist2Version:
|
||||
return Sist2Version(self._descriptor["version"])
|
||||
|
||||
def timestamp(self) -> datetime:
|
||||
return datetime.fromtimestamp(self._descriptor["timestamp"])
|
||||
|
||||
def name(self) -> str:
|
||||
return self._descriptor["name"]
|
||||
|
||||
|
||||
class Sist2:
|
||||
|
||||
def __init__(self, bin_path: str, data_directory: str):
|
||||
self._bin_path = bin_path
|
||||
self._data_dir = data_directory
|
||||
|
||||
def index(self, options: IndexOptions, logs_cb):
|
||||
|
||||
if options.script:
|
||||
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
|
||||
f.write(options.script)
|
||||
options.script_file = f.name
|
||||
else:
|
||||
options.script_file = None
|
||||
|
||||
args = [
|
||||
self._bin_path,
|
||||
*options.args(),
|
||||
"--json-logs",
|
||||
"--very-verbose"
|
||||
]
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
||||
|
||||
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
||||
t_stderr.start()
|
||||
|
||||
self._consume_logs_stdout(logs_cb, proc)
|
||||
|
||||
t_stderr.join()
|
||||
|
||||
return proc.returncode
|
||||
|
||||
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
|
||||
|
||||
output_dir = os.path.join(
|
||||
self._data_dir,
|
||||
f"scan-{datetime.now()}.sist2"
|
||||
)
|
||||
options.output = output_dir
|
||||
|
||||
args = [
|
||||
self._bin_path,
|
||||
*options.args(),
|
||||
"--json-logs",
|
||||
"--very-verbose"
|
||||
]
|
||||
|
||||
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
|
||||
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
||||
|
||||
set_pid_cb(proc.pid)
|
||||
|
||||
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
||||
t_stderr.start()
|
||||
|
||||
self._consume_logs_stdout(logs_cb, proc)
|
||||
|
||||
t_stderr.join()
|
||||
|
||||
return proc.returncode
|
||||
|
||||
@staticmethod
|
||||
def _consume_logs_stderr(logs_cb, proc):
|
||||
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8")
|
||||
try:
|
||||
for line in pipe_wrapper:
|
||||
if line.strip() == "":
|
||||
continue
|
||||
logs_cb({"stderr": line})
|
||||
finally:
|
||||
proc.wait()
|
||||
pipe_wrapper.close()
|
||||
|
||||
@staticmethod
|
||||
def _consume_logs_stdout(logs_cb, proc):
|
||||
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8")
|
||||
try:
|
||||
for line in pipe_wrapper:
|
||||
if line.strip() == "":
|
||||
continue
|
||||
log_object = json.loads(line)
|
||||
logs_cb(log_object)
|
||||
except Exception as e:
|
||||
proc.kill()
|
||||
try:
|
||||
print(line)
|
||||
except NameError:
|
||||
pass
|
||||
print(traceback.format_exc())
|
||||
finally:
|
||||
pass
|
||||
# proc.wait()
|
||||
# pipe_wrapper.close()
|
||||
|
||||
def web(self, options: WebOptions, name: str):
|
||||
args = [
|
||||
self._bin_path,
|
||||
*options.args()
|
||||
]
|
||||
|
||||
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
|
||||
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
|
||||
|
||||
def logs_cb(message):
|
||||
web_logger.info(json.dumps(message))
|
||||
|
||||
logger.info(f"Starting frontend {' '.join(args)}")
|
||||
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
||||
|
||||
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
||||
t_stderr.start()
|
||||
|
||||
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
|
||||
t_stdout.start()
|
||||
|
||||
return proc.pid
|
||||
50
sist2-admin/sist2_admin/state.py
Normal file
50
sist2-admin/sist2_admin/state.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from typing import Dict
|
||||
|
||||
from hexlib.db import Table
|
||||
import pickle
|
||||
|
||||
from tesseract import get_tesseract_langs
|
||||
|
||||
RUNNING_FRONTENDS: Dict[str, int] = {}
|
||||
|
||||
TESSERACT_LANGS = get_tesseract_langs()
|
||||
|
||||
DB_SCHEMA_VERSION = "1"
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def _serialize(item):
|
||||
if isinstance(item, BaseModel):
|
||||
return pickle.dumps(item)
|
||||
if isinstance(item, bytes):
|
||||
raise Exception("FIXME: bytes in PickleTable")
|
||||
return item
|
||||
|
||||
|
||||
def _deserialize(item):
|
||||
if isinstance(item, bytes):
|
||||
return pickle.loads(item)
|
||||
return item
|
||||
|
||||
|
||||
class PickleTable(Table):
|
||||
|
||||
def __getitem__(self, item):
|
||||
row = super().__getitem__(item)
|
||||
if row:
|
||||
return dict((k, _deserialize(v)) for k, v in row.items())
|
||||
return row
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
value = dict((k, _serialize(v)) for k, v in value.items())
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def __iter__(self):
|
||||
for row in super().__iter__():
|
||||
yield dict((k, _deserialize(v)) for k, v in row.items())
|
||||
|
||||
def sql(self, where_clause, *params):
|
||||
for row in super().sql(where_clause, *params):
|
||||
yield dict((k, _deserialize(v)) for k, v in row.items())
|
||||
|
||||
14
sist2-admin/sist2_admin/tesseract.py
Normal file
14
sist2-admin/sist2_admin/tesseract.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import subprocess
|
||||
|
||||
|
||||
def get_tesseract_langs():
|
||||
|
||||
res = subprocess.check_output([
|
||||
"tesseract",
|
||||
"--list-langs"
|
||||
]).decode()
|
||||
|
||||
languages = res.split("\n")[1:]
|
||||
|
||||
return list(filter(lambda lang: lang and lang != "osd", languages))
|
||||
|
||||
29
sist2-admin/sist2_admin/web.py
Normal file
29
sist2-admin/sist2_admin/web.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os.path
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from sist2 import WebOptions
|
||||
|
||||
|
||||
class Sist2Frontend(BaseModel):
|
||||
name: str
|
||||
jobs: List[str]
|
||||
web_options: WebOptions
|
||||
running: bool = False
|
||||
|
||||
auto_start: bool = False
|
||||
enable_monitoring: bool = True
|
||||
extra_query_args: str = ""
|
||||
custom_url: str = None
|
||||
|
||||
def get_log_path(self, log_folder: str):
|
||||
return os.path.join(log_folder, f"frontend-{self.name}.log")
|
||||
|
||||
@staticmethod
|
||||
def create_default(name: str):
|
||||
return Sist2Frontend(
|
||||
name=name,
|
||||
web_options=WebOptions(),
|
||||
jobs=[]
|
||||
)
|
||||
Reference in New Issue
Block a user