Add sist2-admin, update Dockerfile & docker-compose

This commit is contained in:
2022-11-26 21:22:49 -05:00
parent c0b8a9c467
commit cb4bd9f05a
50 changed files with 39558 additions and 16 deletions

View File

@@ -0,0 +1,392 @@
import asyncio
import os
import signal
from datetime import datetime
from urllib.parse import urlparse
import requests
import uvicorn
from fastapi import FastAPI, HTTPException
from hexlib.db import PersistentState
from requests import ConnectionError
from requests.exceptions import SSLError
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import RedirectResponse
from starlette.staticfiles import StaticFiles
from starlette.websockets import WebSocket
from websockets.exceptions import ConnectionClosed
import cron
from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
from notifications import Subscribe, Notifications
from sist2 import Sist2
from state import PickleTable, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
from web import Sist2Frontend
VERSION = "1.0"
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(table_factory=PickleTable, dbfile=os.path.join(DATA_FOLDER, "state.db"))
notifications = Notifications()
task_queue = TaskQueue(sist2, db, notifications)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_credentials=True,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
app.mount("/ui/", StaticFiles(directory="./frontend/dist", html=True), name="static")
@app.get("/")
async def home():
return RedirectResponse("ui")
@app.get("/api")
async def api():
return {
"version": VERSION,
"tesseract_langs": TESSERACT_LANGS,
"logs_folder": LOG_FOLDER
}
@app.get("/api/job/{name:str}")
async def get_job(name: str):
row = db["jobs"][name]
if row:
return row["job"]
raise HTTPException(status_code=404)
@app.get("/api/frontend/{name:str}")
async def get_frontend(name: str):
row = db["frontends"][name]
if row:
frontend = row["frontend"]
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
return frontend
raise HTTPException(status_code=404)
@app.get("/api/job/")
async def get_jobs():
return [row["job"] for row in db["jobs"]]
@app.put("/api/job/{name:str}")
async def update_job(name: str, job: Sist2Job):
# TODO: Check etag
job.last_modified = datetime.now()
row = db["jobs"][name]
if not row:
raise HTTPException(status_code=404)
args_that_trigger_full_scan = [
"path",
"thumbnail_count",
"thumbnail_quality",
"thumbnail_size",
"content_size",
"depth",
"archive",
"archive_passphrase",
"ocr_lang",
"ocr_images",
"ocr_ebooks",
"fast",
"checksums",
"read_subtitles",
]
for arg in args_that_trigger_full_scan:
if getattr(row["job"].scan_options, arg) != getattr(job.scan_options, arg):
job.do_full_scan = True
db["jobs"][name] = {"job": job}
@app.put("/api/frontend/{name:str}")
async def update_frontend(name: str, frontend: Sist2Frontend):
db["frontends"][name] = {"frontend": frontend}
# TODO: Check etag
return "ok"
@app.get("/api/task/")
async def get_tasks():
return list(map(lambda t: t.json(), task_queue.tasks()))
@app.get("/api/task/history")
async def task_history():
return list(db["task_done"].sql("ORDER BY started DESC"))
@app.post("/api/task/{task_id:str}/kill")
async def kill_job(task_id: str):
return task_queue.kill_task(task_id)
def _run_job(job: Sist2Job):
job.last_modified = datetime.now()
if job.status == JobStatus("created"):
job.status = JobStatus("started")
db["jobs"][job.name] = {"job": job}
scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
task_queue.submit(scan_task)
task_queue.submit(index_task)
@app.get("/api/job/{name:str}/run")
async def run_job(name: str):
row = db["jobs"][name]
if not row:
raise HTTPException(status_code=404)
_run_job(row["job"])
return "ok"
@app.delete("/api/job/{name:str}")
async def delete_job(name: str):
row = db["jobs"][name]
if row:
del db["jobs"][name]
else:
raise HTTPException(status_code=404)
@app.delete("/api/frontend/{name:str}")
async def delete_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
row = db["frontends"][name]
if row:
del db["frontends"][name]
else:
raise HTTPException(status_code=404)
@app.post("/api/job/{name:str}")
async def create_job(name: str):
if db["jobs"][name]:
raise ValueError("Job with the same name already exists")
job = Sist2Job.create_default(name)
db["jobs"][name] = {"job": job}
return job
@app.post("/api/frontend/{name:str}")
async def create_frontend(name: str):
if db["frontend"][name]:
raise ValueError("Frontend with the same name already exists")
frontend = Sist2Frontend.create_default(name)
db["frontends"][name] = {"frontend": frontend}
return frontend
@app.get("/api/ping_es")
async def ping_es(url: str, insecure: bool):
return check_es_version(url, insecure)
def check_es_version(es_url: str, insecure: bool):
try:
url = urlparse(es_url)
if url.username:
auth = (url.username, url.password)
es_url = f"{url.scheme}://{url.hostname}:{url.port}"
else:
auth = None
r = requests.get(es_url, verify=insecure, auth=auth)
except SSLError:
return {
"ok": False,
"message": "Invalid SSL certificate"
}
except ConnectionError as e:
return {
"ok": False,
"message": "Connection refused"
}
except ValueError as e:
return {
"ok": False,
"message": str(e)
}
if r.status_code == 401:
return {
"ok": False,
"message": "Authentication failure"
}
try:
return {
"ok": True,
"message": "Elasticsearch version " + r.json()["version"]["number"]
}
except:
return {
"ok": False,
"message": "Could not read version"
}
def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs))
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend.name] = pid
@app.post("/api/frontend/{name:str}/start")
async def start_frontend(name: str):
row = db["frontends"][name]
if not row:
raise HTTPException(status_code=404)
start_frontend_(row["frontend"])
@app.post("/api/frontend/{name:str}/stop")
async def stop_frontend(name: str):
if name in RUNNING_FRONTENDS:
os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
del RUNNING_FRONTENDS[name]
@app.get("/api/frontend/")
async def get_frontends():
res = []
for row in db["frontends"]:
frontend = row["frontend"]
frontend: Sist2Frontend
frontend.running = frontend.name in RUNNING_FRONTENDS
res.append(frontend)
return res
def tail(filepath: str, n: int):
with open(filepath) as file:
reached_eof = False
buffer = []
line = ""
while True:
tmp = file.readline()
if tmp:
line += tmp
if line.endswith("\n"):
if reached_eof:
yield line
else:
if len(buffer) > n:
buffer.pop(0)
buffer.append(line)
line = ""
else:
if not reached_eof:
reached_eof = True
yield from buffer
yield None
@app.websocket("/notifications")
async def ws_tail_log(websocket: WebSocket):
await websocket.accept()
try:
await websocket.receive_text()
async with Subscribe(notifications) as ob:
async for notification in ob.notifications():
await websocket.send_json(notification)
print(notification)
except ConnectionClosed:
return
@app.websocket("/log/{task_id}")
async def ws_tail_log(websocket: WebSocket, task_id: str, n: int):
log_file = os.path.join(LOG_FOLDER, f"sist2-{task_id}.log")
await websocket.accept()
try:
await websocket.receive_text()
except ConnectionClosed:
return
while True:
for line in tail(log_file, n):
try:
if line:
await websocket.send_text(line)
else:
await websocket.send_json({"ping": ""})
await asyncio.sleep(0.1)
except ConnectionClosed:
return
def main():
uvicorn.run(app, port=WEBSERVER_PORT, host="0.0.0.0")
def initialize_db():
db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
frontend = Sist2Frontend.create_default("default")
db["frontends"]["default"] = {"frontend": frontend}
logger.info("Initialized database.")
def start_frontends():
for row in db["frontends"]:
frontend: Sist2Frontend = row["frontend"]
if frontend.auto_start and len(frontend.jobs) > 0:
start_frontend_(frontend)
if __name__ == '__main__':
if not db["sist2_admin"]["info"]:
initialize_db()
elif db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
print("Database has incompatible schema version! Delete state.db to continue.")
exit(-1)
start_frontends()
cron.initialize(db, _run_job)
logger.info("Started sist2-admin. Hello!")
main()

View File

@@ -0,0 +1,30 @@
import os
import logging
import sys
from logging import StreamHandler
from logging.handlers import RotatingFileHandler
MAX_LOG_SIZE = 1 * 1024 * 1024
SIST2_BINARY = os.environ.get("SIST2_BINARY", "/root/sist2")
DATA_FOLDER = os.environ.get("DATA_FOLDER", "/sist2-admin/")
LOG_FOLDER = os.path.join(DATA_FOLDER, "logs")
WEBSERVER_PORT = 8080
os.makedirs(LOG_FOLDER, exist_ok=True)
os.makedirs(DATA_FOLDER, exist_ok=True)
logger = logging.Logger("sist2-admin")
_log_file = os.path.join(LOG_FOLDER, "sist2-admin.log")
_log_fmt = "%(asctime)s [%(levelname)s] %(message)s"
_log_formatter = logging.Formatter(_log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
console_handler = StreamHandler(sys.stdout)
console_handler.setFormatter(_log_formatter)
file_handler = RotatingFileHandler(_log_file, mode="a", maxBytes=MAX_LOG_SIZE, backupCount=1)
file_handler.setFormatter(_log_formatter)
logger.addHandler(console_handler)
logger.addHandler(file_handler)

View File

@@ -0,0 +1,33 @@
from threading import Thread
import pycron
import time
from hexlib.db import PersistentState
from config import logger
from jobs import Sist2Job
def _check_schedule(db: PersistentState, run_job):
for job in (row["job"] for row in db["jobs"]):
job: Sist2Job
if job.schedule_enabled:
if pycron.is_now(job.cron_expression):
logger.info(f"Submit scan task to queue for [{job.name}]")
run_job(job)
def _cron_thread(db, run_job):
time.sleep(60 - (time.time() % 60))
start = time.time()
while True:
_check_schedule(db, run_job)
time.sleep(60 - ((time.time() - start) % 60))
def initialize(db, run_job):
t = Thread(target=_cron_thread, args=(db, run_job), daemon=True, name="timer")
t.start()

View File

@@ -0,0 +1,315 @@
import json
import logging
import os.path
import shutil
import signal
import uuid
from datetime import datetime
from enum import Enum
from hashlib import md5
from logging import FileHandler
from threading import Lock, Thread
from time import sleep
from uuid import uuid4, UUID
from hexlib.db import PersistentState
from pydantic import BaseModel, validator
from config import logger, LOG_FOLDER
from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
from state import RUNNING_FRONTENDS
from web import Sist2Frontend
class JobStatus(Enum):
CREATED = "created"
STARTED = "started"
INDEXED = "indexed"
FAILED = "failed"
class Sist2Job(BaseModel):
name: str
scan_options: ScanOptions
index_options: IndexOptions
cron_expression: str
schedule_enabled: bool = False
previous_index: str = None
last_index: str = None
last_index_date: datetime = None
status: JobStatus = JobStatus("created")
last_modified: datetime
etag: str = None
do_full_scan: bool = False
def __init__(self, **kwargs):
super().__init__(**kwargs)
@staticmethod
def create_default(name: str):
return Sist2Job(
name=name,
scan_options=ScanOptions(path="/"),
index_options=IndexOptions(),
last_modified=datetime.now(),
cron_expression="0 0 * * *"
)
@validator("etag", always=True)
def validate_etag(cls, value, values):
s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
return md5(s.encode()).hexdigest()
class Sist2TaskProgress:
def __init__(self, done: int = 0, count: int = 0, index_size: int = 0, tn_size: int = 0, waiting: bool = False):
self.done = done
self.count = count
self.index_size = index_size
self.store_size = tn_size
self.waiting = waiting
def percent(self):
return (self.done / self.count) if self.count else 0
class Sist2Task:
def __init__(self, job: Sist2Job, display_name: str, depends_on: uuid.UUID = None):
self.job = job
self.display_name = display_name
self.progress = Sist2TaskProgress()
self.id = uuid4()
self.pid = None
self.started = None
self.ended = None
self.depends_on = depends_on
self._logger = logging.Logger(name=f"{self.id}")
self._logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"sist2-{self.id}.log")))
def json(self):
return {
"id": self.id,
"job": self.job,
"display_name": self.display_name,
"progress": self.progress,
"started": self.started,
"ended": self.ended,
"depends_on": self.depends_on,
}
def log_callback(self, log_json):
if "progress" in log_json:
self.progress = Sist2TaskProgress(**log_json["progress"])
elif self._logger:
self._logger.info(json.dumps(log_json))
def run(self, sist2: Sist2, db: PersistentState):
self.started = datetime.now()
logger.info(f"Started task {self.display_name}")
class Sist2ScanTask(Sist2Task):
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.scan_options.name = self.job.name
if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
self.job.scan_options.incremental = self.job.last_index
else:
self.job.scan_options.incremental = None
def set_pid(pid):
self.pid = pid
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
self.ended = datetime.now()
if return_code != 0:
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})")
else:
index = Sist2Index(self.job.scan_options.output)
# Save latest index
self.job.previous_index = self.job.last_index
self.job.last_index = index.path
self.job.last_index_date = datetime.now()
self.job.do_full_scan = False
db["jobs"][self.job.name] = {"job": self.job}
self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
class Sist2IndexTask(Sist2Task):
def __init__(self, job: Sist2Job, display_name: str, depends_on: Sist2Task):
super().__init__(job, display_name, depends_on=depends_on.id)
def run(self, sist2: Sist2, db: PersistentState):
super().run(sist2, db)
self.job.index_options.path = self.job.scan_options.output
return_code = sist2.index(self.job.index_options, logs_cb=self.log_callback)
self.ended = datetime.now()
duration = self.ended - self.started
ok = return_code == 0
if ok:
# Remove old index
if self.job.previous_index is not None:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
try:
shutil.rmtree(self.job.previous_index)
except FileNotFoundError:
pass
self.restart_running_frontends(db, sist2)
# Update status
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
db["jobs"][self.job.name] = {"job": self.job}
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
return return_code
def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
for frontend_name, pid in RUNNING_FRONTENDS.items():
frontend = db["frontends"][frontend_name]["frontend"]
frontend: Sist2Frontend
os.kill(pid, signal.SIGTERM)
try:
os.wait()
except ChildProcessError:
pass
frontend.web_options.indices = map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs)
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid
self._logger.info(json.dumps({"sist2-admin": f"Restart frontend {pid=} {frontend_name=}"}))
class TaskQueue:
def __init__(self, sist2: Sist2, db: PersistentState, notifications: Notifications):
self._lock = Lock()
self._sist2 = sist2
self._db = db
self._notifications = notifications
self._tasks = {}
self._queue = []
self._sem = 0
self._thread = Thread(target=self._check_new_task, daemon=True)
self._thread.start()
def _tasks_failed(self):
done = set()
for row in self._db["task_done"].sql("WHERE return_code != 0"):
done.add(uuid.UUID(row["id"]))
return done
def _tasks_done(self):
done = set()
for row in self._db["task_done"]:
done.add(uuid.UUID(row["id"]))
return done
def _check_new_task(self):
while True:
with self._lock:
for task in list(self._queue):
task: Sist2Task
if self._sem >= 1:
break
if not task.depends_on or task.depends_on in self._tasks_done():
self._queue.remove(task)
if task.depends_on in self._tasks_failed():
# The task which we depend on failed, continue
continue
self._sem += 1
t = Thread(target=self._run_task, args=(task,))
self._tasks[task.id] = {
"task": task,
"thread": t,
}
t.start()
break
sleep(1)
def tasks(self):
return list(map(lambda t: t["task"], self._tasks.values()))
def kill_task(self, task_id):
task = self._tasks.get(UUID(task_id))
if task:
pid = task["task"].pid
logger.info(f"Killing task {task_id} (pid={pid})")
os.kill(pid, signal.SIGTERM)
return True
return False
def _run_task(self, task: Sist2Task):
task_result = task.run(self._sist2, self._db)
with self._lock:
del self._tasks[task.id]
self._sem -= 1
self._db["task_done"][task.id] = {
"ended": task.ended,
"started": task.started,
"name": task.display_name,
"return_code": task_result
}
if isinstance(task, Sist2IndexTask):
self._notifications.notify({
"message": "notifications.indexCompleted",
"job": task.job.name
})
def submit(self, task: Sist2Task):
logger.info(f"Submitted task to queue {task.display_name}")
with self._lock:
self._queue.append(task)

View File

@@ -0,0 +1,40 @@
import asyncio
from typing import List
class Notifications:
def __init__(self):
self._subscribers: List[Subscribe] = []
def subscribe(self, ob):
self._subscribers.append(ob)
def unsubscribe(self, ob):
self._subscribers.remove(ob)
def notify(self, notification: dict):
for ob in self._subscribers:
ob.notify(notification)
class Subscribe:
def __init__(self, notifications: Notifications):
self._queue = []
self._notifications = notifications
async def __aenter__(self):
self._notifications.subscribe(self)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self._notifications.unsubscribe(self)
def notify(self, notification: dict):
self._queue.append(notification)
async def notifications(self):
while True:
try:
yield self._queue.pop(0)
except IndexError:
await asyncio.sleep(0.1)

View File

@@ -0,0 +1,307 @@
import datetime
import json
import logging
import os.path
import traceback
from datetime import datetime
from io import TextIOWrapper
from logging import FileHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List
from pydantic import BaseModel
from config import logger, LOG_FOLDER
class Sist2Version:
def __init__(self, version: str):
self._version = version
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"
class WebOptions(BaseModel):
indices: List[str] = []
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
bind: str = "0.0.0.0:4090"
auth: str = None
tag_auth: str = None
tagline: str = "Lightning-fast file system indexer and search tool"
dev: bool = False
lang: str = "en"
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["web", f"--es-url={self.es_url}", f"--bind={self.bind}",
f"--tagline={self.tagline}", f"--lang={self.lang}"]
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.auth:
args.append(f"--auth={self.auth}")
if self.tag_auth:
args.append(f"--tag_auth={self.tag_auth}")
if self.dev:
args.append(f"--dev")
args.extend(self.indices)
return args
class IndexOptions(BaseModel):
path: str = None
threads: int = 1
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
incremental_index: bool = False
script: str = ""
script_file: str = None
batch_size: int = 100
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
if self.script_file:
args.append(f"--script-file={self.script_file}")
if self.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.incremental_index:
args.append(f"--incremental-index")
return args
ARCHIVE_SKIP = "skip"
ARCHIVE_LIST = "list"
ARCHIVE_SHALLOW = "shallow"
ARCHIVE_RECURSE = "recurse"
class ScanOptions(BaseModel):
path: str
threads: int = 1
mem_throttle: int = 0
thumbnail_quality: float = 1.0
thumbnail_size: int = 500
thumbnail_count: int = 1
content_size: int = 32768
depth: int = -1
archive: str = ARCHIVE_RECURSE
archive_passphrase: str = None
ocr_lang: bool = None
ocr_images: bool = False
ocr_ebooks: bool = False
exclude: str = None
fast: bool = False
treemap_threshold: float = 0.0005
mem_buffer: int = 2000
read_subtitles: bool = False
fast_epub: bool = False
checksums: bool = False
incremental: str = None
output: str = None
name: str = None
rewrite_url: str = None
list_file: str = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
def args(self):
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental:
args.append(f"--incremental={self.incremental}")
if self.rewrite_url:
args.append(f"--rewrite-url={self.rewrite_url}")
if self.name:
args.append(f"--name={self.name}")
if self.archive_passphrase:
args.append(f"--archive-passphrase={self.archive_passphrase}")
if self.ocr_lang:
args.append(f"--ocr-lang={self.ocr_lang}")
if self.ocr_ebooks:
args.append(f"--ocr-ebooks")
if self.ocr_images:
args.append(f"--ocr-images")
if self.exclude:
args.append(f"--exclude={self.exclude}")
if self.fast:
args.append(f"--fast")
if self.treemap_threshold:
args.append(f"--treemap-threshold={self.treemap_threshold}")
if self.read_subtitles:
args.append(f"--read-subtitles")
if self.fast_epub:
args.append(f"--fast-epub")
if self.checksums:
args.append(f"--checksums")
if self.list_file:
args.append(f"--list_file={self.list_file}")
return args
class Sist2Index:
def __init__(self, path):
self.path = path
with open(os.path.join(path, "descriptor.json")) as f:
self._descriptor = json.load(f)
def to_json(self):
return {
"path": self.path,
"version": self.version(),
"timestamp": self.timestamp(),
"name": self.name()
}
def version(self) -> Sist2Version:
return Sist2Version(self._descriptor["version"])
def timestamp(self) -> datetime:
return datetime.fromtimestamp(self._descriptor["timestamp"])
def name(self) -> str:
return self._descriptor["name"]
class Sist2:
def __init__(self, bin_path: str, data_directory: str):
self._bin_path = bin_path
self._data_dir = data_directory
def index(self, options: IndexOptions, logs_cb):
if options.script:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
f.write(options.script)
options.script_file = f.name
else:
options.script_file = None
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
output_dir = os.path.join(
self._data_dir,
f"scan-{datetime.now()}.sist2"
)
options.output = output_dir
args = [
self._bin_path,
*options.args(),
"--json-logs",
"--very-verbose"
]
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
proc = Popen(args, stdout=PIPE, stderr=PIPE)
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
t_stderr.join()
return proc.returncode
@staticmethod
def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
logs_cb({"stderr": line})
finally:
proc.wait()
pipe_wrapper.close()
@staticmethod
def _consume_logs_stdout(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8")
try:
for line in pipe_wrapper:
if line.strip() == "":
continue
log_object = json.loads(line)
logs_cb(log_object)
except Exception as e:
proc.kill()
try:
print(line)
except NameError:
pass
print(traceback.format_exc())
finally:
pass
# proc.wait()
# pipe_wrapper.close()
def web(self, options: WebOptions, name: str):
args = [
self._bin_path,
*options.args()
]
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
def logs_cb(message):
web_logger.info(json.dumps(message))
logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
t_stdout.start()
return proc.pid

View File

@@ -0,0 +1,50 @@
from typing import Dict
from hexlib.db import Table
import pickle
from tesseract import get_tesseract_langs
RUNNING_FRONTENDS: Dict[str, int] = {}
TESSERACT_LANGS = get_tesseract_langs()
DB_SCHEMA_VERSION = "1"
from pydantic import BaseModel
def _serialize(item):
if isinstance(item, BaseModel):
return pickle.dumps(item)
if isinstance(item, bytes):
raise Exception("FIXME: bytes in PickleTable")
return item
def _deserialize(item):
if isinstance(item, bytes):
return pickle.loads(item)
return item
class PickleTable(Table):
def __getitem__(self, item):
row = super().__getitem__(item)
if row:
return dict((k, _deserialize(v)) for k, v in row.items())
return row
def __setitem__(self, key, value):
value = dict((k, _serialize(v)) for k, v in value.items())
super().__setitem__(key, value)
def __iter__(self):
for row in super().__iter__():
yield dict((k, _deserialize(v)) for k, v in row.items())
def sql(self, where_clause, *params):
for row in super().sql(where_clause, *params):
yield dict((k, _deserialize(v)) for k, v in row.items())

View File

@@ -0,0 +1,14 @@
import subprocess
def get_tesseract_langs():
res = subprocess.check_output([
"tesseract",
"--list-langs"
]).decode()
languages = res.split("\n")[1:]
return list(filter(lambda lang: lang and lang != "osd", languages))

View File

@@ -0,0 +1,29 @@
import os.path
from typing import List
from pydantic import BaseModel
from sist2 import WebOptions
class Sist2Frontend(BaseModel):
name: str
jobs: List[str]
web_options: WebOptions
running: bool = False
auto_start: bool = False
enable_monitoring: bool = True
extra_query_args: str = ""
custom_url: str = None
def get_log_path(self, log_folder: str):
return os.path.join(log_folder, f"frontend-{self.name}.log")
@staticmethod
def create_default(name: str):
return Sist2Frontend(
name=name,
web_options=WebOptions(),
jobs=[]
)