mirror of
https://github.com/simon987/sist2.git
synced 2025-04-04 07:52:59 +00:00
352 lines
11 KiB
Python
352 lines
11 KiB
Python
import datetime
|
|
import json
|
|
import logging
|
|
import os.path
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from io import TextIOWrapper
|
|
from logging import FileHandler
|
|
from subprocess import Popen, PIPE
|
|
from tempfile import NamedTemporaryFile
|
|
from threading import Thread
|
|
from typing import List
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from config import logger, LOG_FOLDER, DATA_FOLDER
|
|
|
|
|
|
class Sist2Version:
|
|
def __init__(self, version: str):
|
|
self._version = version
|
|
|
|
self.major, self.minor, self.patch = [int(x) for x in version.split(".")]
|
|
|
|
def __str__(self):
|
|
return f"{self.major}.{self.minor}.{self.patch}"
|
|
|
|
|
|
class SearchBackendType(Enum):
|
|
SQLITE = "sqlite"
|
|
ELASTICSEARCH = "elasticsearch"
|
|
|
|
|
|
class Sist2SearchBackend(BaseModel):
|
|
backend_type: SearchBackendType = SearchBackendType("elasticsearch")
|
|
name: str
|
|
|
|
search_index: str = ""
|
|
|
|
es_url: str = "http://elasticsearch:9200"
|
|
es_insecure_ssl: bool = False
|
|
es_index: str = "sist2"
|
|
threads: int = 1
|
|
batch_size: int = 70
|
|
|
|
@staticmethod
|
|
def create_default(name: str, backend_type: SearchBackendType = SearchBackendType("elasticsearch")):
|
|
return Sist2SearchBackend(
|
|
name=name,
|
|
search_index=f"search-index-{name.replace('/', '_')}.sist2",
|
|
backend_type=backend_type
|
|
)
|
|
|
|
|
|
class IndexOptions(BaseModel):
|
|
path: str = None
|
|
incremental_index: bool = True
|
|
search_backend: str = None
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self, search_backend):
|
|
absolute_path = os.path.join(DATA_FOLDER, self.path)
|
|
|
|
if search_backend.backend_type == SearchBackendType("sqlite"):
|
|
search_index_absolute = os.path.join(DATA_FOLDER, search_backend.search_index)
|
|
args = ["sqlite-index", absolute_path, "--search-index", search_index_absolute]
|
|
else:
|
|
args = ["index", absolute_path, f"--threads={search_backend.threads}",
|
|
f"--es-url={search_backend.es_url}",
|
|
f"--es-index={search_backend.es_index}",
|
|
f"--batch-size={search_backend.batch_size}"]
|
|
|
|
if search_backend.es_insecure_ssl:
|
|
args.append(f"--es-insecure-ssl")
|
|
if self.incremental_index:
|
|
args.append(f"--incremental-index")
|
|
|
|
return args
|
|
|
|
|
|
ARCHIVE_SKIP = "skip"
|
|
ARCHIVE_LIST = "list"
|
|
ARCHIVE_SHALLOW = "shallow"
|
|
ARCHIVE_RECURSE = "recurse"
|
|
|
|
|
|
class ScanOptions(BaseModel):
|
|
path: str
|
|
threads: int = 1
|
|
thumbnail_quality: int = 50
|
|
thumbnail_size: int = 552
|
|
thumbnail_count: int = 1
|
|
content_size: int = 32768
|
|
depth: int = -1
|
|
archive: str = ARCHIVE_RECURSE
|
|
archive_passphrase: str = None
|
|
ocr_lang: str = None
|
|
ocr_images: bool = False
|
|
ocr_ebooks: bool = False
|
|
exclude: str = None
|
|
fast: bool = False
|
|
treemap_threshold: float = 0.0005
|
|
mem_buffer: int = 2000
|
|
read_subtitles: bool = False
|
|
fast_epub: bool = False
|
|
checksums: bool = False
|
|
incremental: bool = True
|
|
optimize_index: bool = False
|
|
output: str = None
|
|
name: str = None
|
|
rewrite_url: str = None
|
|
list_file: str = None
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self):
|
|
|
|
output_path = os.path.join(DATA_FOLDER, self.output)
|
|
|
|
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
|
|
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
|
|
f"--content-size={self.content_size}", f"--output={output_path}", f"--depth={self.depth}",
|
|
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
|
|
|
|
if self.incremental:
|
|
args.append(f"--incremental")
|
|
if self.optimize_index:
|
|
args.append(f"--optimize-index")
|
|
if self.rewrite_url:
|
|
args.append(f"--rewrite-url={self.rewrite_url}")
|
|
if self.name:
|
|
args.append(f"--name={self.name}")
|
|
if self.archive_passphrase:
|
|
args.append(f"--archive-passphrase={self.archive_passphrase}")
|
|
if self.ocr_lang:
|
|
args.append(f"--ocr-lang={self.ocr_lang}")
|
|
if self.ocr_ebooks:
|
|
args.append(f"--ocr-ebooks")
|
|
if self.ocr_images:
|
|
args.append(f"--ocr-images")
|
|
if self.exclude:
|
|
args.append(f"--exclude={self.exclude}")
|
|
if self.fast:
|
|
args.append(f"--fast")
|
|
if self.treemap_threshold:
|
|
args.append(f"--treemap-threshold={self.treemap_threshold}")
|
|
if self.read_subtitles:
|
|
args.append(f"--read-subtitles")
|
|
if self.fast_epub:
|
|
args.append(f"--fast-epub")
|
|
if self.checksums:
|
|
args.append(f"--checksums")
|
|
if self.list_file:
|
|
args.append(f"--list_file={self.list_file}")
|
|
|
|
return args
|
|
|
|
|
|
class Sist2Index:
|
|
def __init__(self, path):
|
|
self.path = path
|
|
|
|
with open(os.path.join(path, "descriptor.json")) as f:
|
|
self._descriptor = json.load(f)
|
|
|
|
def to_json(self):
|
|
return {
|
|
"path": self.path,
|
|
"version": self.version(),
|
|
"timestamp": self.timestamp(),
|
|
"name": self.name()
|
|
}
|
|
|
|
def version(self) -> Sist2Version:
|
|
return Sist2Version(self._descriptor["version"])
|
|
|
|
def timestamp(self) -> datetime:
|
|
return datetime.fromtimestamp(self._descriptor["timestamp"])
|
|
|
|
def name(self) -> str:
|
|
return self._descriptor["name"]
|
|
|
|
|
|
class WebOptions(BaseModel):
|
|
indices: List[str] = []
|
|
|
|
search_backend: str = "elasticsearch"
|
|
|
|
bind: str = "0.0.0.0:4090"
|
|
auth: str = None
|
|
tag_auth: str = None
|
|
tagline: str = "Lightning-fast file system indexer and search tool"
|
|
dev: bool = False
|
|
lang: str = "en"
|
|
auth0_audience: str = None
|
|
auth0_domain: str = None
|
|
auth0_client_id: str = None
|
|
auth0_public_key: str = None
|
|
auth0_public_key_file: str = None
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
|
|
def args(self, search_backend: Sist2SearchBackend):
|
|
args = ["web", f"--bind={self.bind}", f"--tagline={self.tagline}",
|
|
f"--lang={self.lang}"]
|
|
|
|
if search_backend.backend_type == SearchBackendType("sqlite"):
|
|
search_index_absolute = os.path.join(DATA_FOLDER, search_backend.search_index)
|
|
args.append(f"--search-index={search_index_absolute}")
|
|
else:
|
|
args.append(f"--es-url={search_backend.es_url}")
|
|
args.append(f"--es-index={search_backend.es_index}")
|
|
if search_backend.es_insecure_ssl:
|
|
args.append(f"--es-insecure-ssl")
|
|
|
|
if self.auth0_audience:
|
|
args.append(f"--auth0-audience={self.auth0_audience}")
|
|
if self.auth0_domain:
|
|
args.append(f"--auth0-domain={self.auth0_domain}")
|
|
if self.auth0_client_id:
|
|
args.append(f"--auth0-client-id={self.auth0_client_id}")
|
|
if self.auth0_public_key_file:
|
|
args.append(f"--auth0-public-key-file={self.auth0_public_key_file}")
|
|
if self.auth:
|
|
args.append(f"--auth={self.auth}")
|
|
if self.tag_auth:
|
|
args.append(f"--tag-auth={self.tag_auth}")
|
|
if self.dev:
|
|
args.append(f"--dev")
|
|
|
|
args.extend(self.indices)
|
|
|
|
return args
|
|
|
|
|
|
class Sist2:
|
|
|
|
def __init__(self, bin_path: str, data_directory: str):
|
|
self.bin_path = bin_path
|
|
self._data_dir = data_directory
|
|
|
|
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb):
|
|
|
|
args = [
|
|
self.bin_path,
|
|
*options.args(search_backend),
|
|
"--json-logs",
|
|
"--very-verbose"
|
|
]
|
|
|
|
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
self._consume_logs_stdout(logs_cb, proc)
|
|
|
|
t_stderr.join()
|
|
|
|
return proc.returncode
|
|
|
|
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
|
|
|
|
if options.output is None:
|
|
options.output = f"scan-{options.name.replace('/', '_')}-{datetime.utcnow()}.sist2"
|
|
|
|
args = [
|
|
self.bin_path,
|
|
*options.args(),
|
|
"--json-logs",
|
|
"--very-verbose"
|
|
]
|
|
|
|
logs_cb({"sist2-admin": f"Starting sist2 command with args {args}"})
|
|
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
set_pid_cb(proc.pid)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
self._consume_logs_stdout(logs_cb, proc)
|
|
|
|
t_stderr.join()
|
|
|
|
return proc.returncode
|
|
|
|
@staticmethod
|
|
def _consume_logs_stderr(logs_cb, proc):
|
|
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
|
|
try:
|
|
for line in pipe_wrapper:
|
|
if line.strip() == "":
|
|
continue
|
|
logs_cb({"stderr": line})
|
|
finally:
|
|
proc.wait()
|
|
pipe_wrapper.close()
|
|
|
|
@staticmethod
|
|
def _consume_logs_stdout(logs_cb, proc):
|
|
pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
|
|
for line in pipe_wrapper:
|
|
try:
|
|
if line.strip() == "":
|
|
continue
|
|
log_object = json.loads(line)
|
|
logs_cb(log_object)
|
|
except Exception as e:
|
|
try:
|
|
logs_cb({"sist2-admin": f"Could not decode log line: {line}; {e}"})
|
|
except NameError:
|
|
pass
|
|
|
|
def web(self, options: WebOptions, search_backend: Sist2SearchBackend, name: str):
|
|
|
|
if options.auth0_public_key:
|
|
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
|
|
f.write(options.auth0_public_key)
|
|
options.auth0_public_key_file = f.name
|
|
else:
|
|
options.auth0_public_key_file = None
|
|
|
|
args = [
|
|
self.bin_path,
|
|
*options.args(search_backend)
|
|
]
|
|
|
|
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
|
|
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
|
|
|
|
def logs_cb(message):
|
|
web_logger.info(json.dumps(message))
|
|
|
|
logger.info(f"Starting frontend {' '.join(args)}")
|
|
|
|
proc = Popen(args, stdout=PIPE, stderr=PIPE)
|
|
|
|
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
|
|
t_stderr.start()
|
|
|
|
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))
|
|
t_stdout.start()
|
|
|
|
return proc.pid
|