mirror of
https://github.com/simon987/sist2.git
synced 2025-04-04 07:52:59 +00:00
329 lines
9.5 KiB
Python
329 lines
9.5 KiB
Python
import datetime
|
|
import json
|
|
import logging
|
|
import os.path
|
|
import traceback
|
|
from datetime import datetime
|
|
from io import TextIOWrapper
|
|
from logging import FileHandler
|
|
from subprocess import Popen, PIPE
|
|
from tempfile import NamedTemporaryFile
|
|
from threading import Thread
|
|
from typing import List
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from config import logger, LOG_FOLDER
|
|
|
|
|
|
class Sist2Version:
|
|
def __init__(self, version: str):
|
|
self._version = version
|
|
|
|
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
|
|
|
|
def __str__(self):
|
|
return f"{self.major}.{self.minor}.{self.patch}"
|
|
|
|
|
|
class WebOptions(BaseModel):
|
|
indices: List[str] = []
|
|
es_url: str = "http://elasticsearch:9200"
|
|
es_insecure_ssl: bool = False
|
|
es_index: str = "sist2"
|
|
bind: str = "0.0.0.0:4090"
|
|
auth: str = None
|
|
tag_auth: str = None
|
|
tagline: str = "Lightning-fast file system indexer and search tool"
|
|
dev: bool = False
|
|
lang: str = "en"
|
|
auth0_audience: str = None
|
|
auth0_domain: str = None
|
|
auth0_client_id: str = None
|
|
auth0_public_key: str = None
|
|
auth0_public_key_file: str = None
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self):
|
|
args = ["web", f"--es-url={self.es_url}", f"--bind={self.bind}",
|
|
f"--tagline={self.tagline}", f"--lang={self.lang}"]
|
|
|
|
if self.auth0_audience:
|
|
args.append(f"--auth0-audience={self.auth0_audience}")
|
|
if self.auth0_domain:
|
|
args.append(f"--auth0-domain={self.auth0_domain}")
|
|
if self.auth0_client_id:
|
|
args.append(f"--auth0-client-id={self.auth0_client_id}")
|
|
if self.auth0_public_key_file:
|
|
args.append(f"--auth0-public-key-file={self.auth0_public_key_file}")
|
|
if self.es_insecure_ssl:
|
|
args.append(f"--es-insecure-ssl")
|
|
if self.auth:
|
|
args.append(f"--auth={self.auth}")
|
|
if self.tag_auth:
|
|
args.append(f"--tag_auth={self.tag_auth}")
|
|
if self.dev:
|
|
args.append(f"--dev")
|
|
|
|
args.extend(self.indices)
|
|
|
|
return args
|
|
|
|
|
|
class IndexOptions(BaseModel):
|
|
path: str = None
|
|
threads: int = 1
|
|
es_url: str = "http://elasticsearch:9200"
|
|
es_insecure_ssl: bool = False
|
|
es_index: str = "sist2"
|
|
incremental_index: bool = False
|
|
script: str = ""
|
|
script_file: str = None
|
|
batch_size: int = 100
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self):
|
|
|
|
args = ["index", self.path, f"--threads={self.threads}", f"--es-url={self.es_url}",
|
|
f"--es-index={self.es_index}", f"--batch-size={self.batch_size}"]
|
|
|
|
if self.script_file:
|
|
args.append(f"--script-file={self.script_file}")
|
|
if self.es_insecure_ssl:
|
|
args.append(f"--es-insecure-ssl")
|
|
if self.incremental_index:
|
|
args.append(f"--incremental-index")
|
|
|
|
return args
|
|
|
|
|
|
ARCHIVE_SKIP = "skip"
|
|
ARCHIVE_LIST = "list"
|
|
ARCHIVE_SHALLOW = "shallow"
|
|
ARCHIVE_RECURSE = "recurse"
|
|
|
|
|
|
class ScanOptions(BaseModel):
|
|
path: str
|
|
threads: int = 1
|
|
mem_throttle: int = 0
|
|
thumbnail_quality: int = 2
|
|
thumbnail_size: int = 500
|
|
thumbnail_count: int = 1
|
|
content_size: int = 32768
|
|
depth: int = -1
|
|
archive: str = ARCHIVE_RECURSE
|
|
archive_passphrase: str = None
|
|
ocr_lang: str = None
|
|
ocr_images: bool = False
|
|
ocr_ebooks: bool = False
|
|
exclude: str = None
|
|
fast: bool = False
|
|
treemap_threshold: float = 0.0005
|
|
mem_buffer: int = 2000
|
|
read_subtitles: bool = False
|
|
fast_epub: bool = False
|
|
checksums: bool = False
|
|
incremental: str = None
|
|
output: str = None
|
|
name: str = None
|
|
rewrite_url: str = None
|
|
list_file: str = None
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self):
|
|
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
|
|
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
|
|
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
|
|
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
|
|
|
|
if self.incremental:
|
|
args.append(f"--incremental={self.incremental}")
|
|
if self.rewrite_url:
|
|
args.append(f"--rewrite-url={self.rewrite_url}")
|
|
if self.name:
|
|
args.append(f"--name={self.name}")
|
|
if self.archive_passphrase:
|
|
args.append(f"--archive-passphrase={self.archive_passphrase}")
|
|
if self.ocr_lang:
|
|
args.append(f"--ocr-lang={self.ocr_lang}")
|
|
if self.ocr_ebooks:
|
|
args.append(f"--ocr-ebooks")
|
|
if self.ocr_images:
|
|
args.append(f"--ocr-images")
|
|
if self.exclude:
|
|
args.append(f"--exclude={self.exclude}")
|
|
if self.fast:
|
|
args.append(f"--fast")
|
|
if self.treemap_threshold:
|
|
args.append(f"--treemap-threshold={self.treemap_threshold}")
|
|
if self.read_subtitles:
|
|
args.append(f"--read-subtitles")
|
|
if self.fast_epub:
|
|
args.append(f"--fast-epub")
|
|
if self.checksums:
|
|
args.append(f"--checksums")
|
|
if self.list_file:
|
|
args.append(f"--list_file={self.list_file}")
|
|
|
|
return args
|
|
|
|
|
|
class Sist2Index:
|
|
def __init__(self, path):
|
|
self.path = path
|
|
|
|
with open(os.path.join(path, "descriptor.json")) as f:
|
|
self._descriptor = json.load(f)
|
|
|
|
def to_json(self):
|
|
return {
|
|
"path": self.path,
|
|
"version": self.version(),
|
|
"timestamp": self.timestamp(),
|
|
"name": self.name()
|
|
}
|
|
|
|
def version(self) -> Sist2Version:
|
|
return Sist2Version(self._descriptor["version"])
|
|
|
|
def timestamp(self) -> datetime:
|
|
return datetime.fromtimestamp(self._descriptor["timestamp"])
|
|
|
|
def name(self) -> str:
|
|
return self._descriptor["name"]
|
|
|
|
|
|
class Sist2:
|
|
|
|
def __init__(self, bin_path: str, data_directory: str):
|
|
self._bin_path = bin_path
|
|
self._data_dir = data_directory
|
|
|
|
def index(self, options: IndexOptions, logs_cb):
|
|
|
|
if options.script:
|
|
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".painless", delete=False) as f:
|
|
f.write(options.script)
|
|
options.script_file = f.name
|
|
else:
|
|
options.script_file = None
|
|
|
|
args = [
|
|
self._bin_path,
|
|
*options.args(),
|
|
"--json-logs",
|
|
"--very-verbose"
|
|
]
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
self._consume_logs_stdout(logs_cb, proc)
|
|
|
|
t_stderr.join()
|
|
|
|
return proc.returncode
|
|
|
|
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
|
|
|
|
output_dir = os.path.join(
|
|
self._data_dir,
|
|
f"scan-{datetime.now()}.sist2"
|
|
)
|
|
options.output = output_dir
|
|
|
|
args = [
|
|
self._bin_path,
|
|
*options.args(),
|
|
"--json-logs",
|
|
"--very-verbose"
|
|
]
|
|
|
|
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
|
|
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
set_pid_cb(proc.pid)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
self._consume_logs_stdout(logs_cb, proc)
|
|
|
|
t_stderr.join()
|
|
|
|
return proc.returncode
|
|
|
|
@staticmethod
|
|
def _consume_logs_stderr(logs_cb, proc):
|
|
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
|
|
try:
|
|
for line in pipe_wrapper:
|
|
if line.strip() == "":
|
|
continue
|
|
logs_cb({"stderr": line})
|
|
finally:
|
|
proc.wait()
|
|
pipe_wrapper.close()
|
|
|
|
@staticmethod
|
|
def _consume_logs_stdout(logs_cb, proc):
|
|
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
|
|
try:
|
|
for line in pipe_wrapper:
|
|
if line.strip() == "":
|
|
continue
|
|
log_object = json.loads(line)
|
|
logs_cb(log_object)
|
|
except Exception as e:
|
|
proc.kill()
|
|
try:
|
|
print(line)
|
|
except NameError:
|
|
pass
|
|
print(traceback.format_exc())
|
|
finally:
|
|
pass
|
|
# proc.wait()
|
|
# pipe_wrapper.close()
|
|
|
|
def web(self, options: WebOptions, name: str):
|
|
|
|
if options.auth0_public_key:
|
|
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
|
|
f.write(options.auth0_public_key)
|
|
options.auth0_public_key_file = f.name
|
|
else:
|
|
options.auth0_public_key_file = None
|
|
|
|
args = [
|
|
self._bin_path,
|
|
*options.args()
|
|
]
|
|
|
|
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
|
|
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
|
|
|
|
def logs_cb(message):
|
|
web_logger.info(json.dumps(message))
|
|
|
|
logger.info(f"Starting frontend {' '.join(args)}")
|
|
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
|
|
t_stdout.start()
|
|
|
|
return proc.pid
|