Update sist2-admin for 3.x.x, more fixes

This commit is contained in:
simon987 2023-04-10 19:45:08 -04:00
parent 6182338f29
commit 01490d1cbf
14 changed files with 874 additions and 78 deletions

View File

@ -21,7 +21,7 @@ RUN cd sist2-admin/frontend/ && npm install && npm run build
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
RUN cd build && make -j$(nproc)
RUN strip build/sist2 || mv sist2_debug build/sist2
RUN strip build/sist2 || mv build/sist2_debug build/sist2
FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
@ -33,7 +33,7 @@ ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]
RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3 \
python3-pip git tesseract-ocr libpq-dev && rm -rf /var/lib/apt/lists/*
python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \

View File

@ -3,13 +3,20 @@ MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
ADD . /build/
RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
RUN cd build && make -j$(nproc)
RUN strip build/sist2 || mv build/sist2_debug build/sist2
FROM --platform="linux/arm64/v8" ubuntu:20.04
FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
WORKDIR /root
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
ENTRYPOINT ["/root/sist2"]
RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \
@ -18,11 +25,16 @@ RUN mkdir -p /usr/share/tessdata && \
curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
# sist2
COPY --from=build /build/build/sist2 /root/sist2
ENTRYPOINT ["/root/sist2"]
COPY --from=build /build/sist2 /root/sist2
# sist2-admin
COPY sist2-admin/requirements.txt sist2-admin/
RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
COPY --from=build /build/sist2-admin/ sist2-admin/

View File

@ -28,16 +28,22 @@ export default {
return this.$store.state.jobDesktopNotificationMap[this.job.name];
}
},
methods: {
mounted() {
this.cronValid = this.checkCron(this.job.cron_expression)
},
methods: {
checkCron(expression) {
return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
},
updateNotifications(value) {
this.$store.dispatch("setJobDesktopNotification", {
job: this.job.name,
enabled: value
})
});
},
update() {
if (this.job.schedule_enabled) {
this.cronValid = /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(this.job.cron_expression);
this.cronValid = this.checkCron(this.job.cron_expression);
} else {
this.cronValid = undefined;
}

View File

@ -6,9 +6,6 @@
<label>{{ $t("scanOptions.threads") }}</label>
<b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.memThrottle") }}</label>
<b-form-input type="number" min="0" v-model="options.mem_throttle" @change="update()"></b-form-input>
<label>{{ $t("scanOptions.thumbnailQuality") }}</label>
<b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
@ -70,8 +67,9 @@
{{ $t("scanOptions.readSubtitles") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.memBuffer") }}</label>
<b-form-input type="number" min="0" v-model="options.mem_buffer" @change="update()"></b-form-input>
<b-form-checkbox v-model="options.optimize_index" @change="update()">
{{ $t("scanOptions.optimizeIndex") }}
</b-form-checkbox>
<label>{{ $t("scanOptions.treemapThreshold") }}</label>
<b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>

View File

@ -56,6 +56,10 @@ export default {
tagline: "Tagline in navbar",
auth: "Basic auth in user:password format",
tagAuth: "Basic auth in user:password format for tagging",
auth0Audience: "Auth0 audience",
auth0Domain: "Auth0 domain",
auth0ClientId: "Auth0 client ID",
auth0PublicKey: "Auth0 public key",
},
scanOptions: {
title: "Scanning options",
@ -80,7 +84,8 @@ export default {
checksums: "Calculate file checksums when scanning",
readSubtitles: "Read subtitles from media files",
memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
treemapThreshold: "Relative size threshold for treemap"
treemapThreshold: "Relative size threshold for treemap",
optimizeIndex: "Defragment index file after scan to reduce its file size."
},
indexOptions: {
title: "Indexing options",

File diff suppressed because it is too large Load Diff

View File

@ -251,7 +251,7 @@ def check_es_version(es_url: str, insecure: bool):
def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].last_index, frontend.jobs))
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend.name] = pid
@ -378,6 +378,9 @@ if __name__ == '__main__':
if db["sist2_admin"]["info"]["version"] == "1":
logger.info("Migrating to v2 database schema")
migrate_v1_to_v2(db)
if db["sist2_admin"]["info"]["version"] == "2":
logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
exit(-1)
start_frontends()
cron.initialize(db, _run_job)

View File

@ -1,23 +1,21 @@
import json
import logging
import os.path
import shutil
import signal
import uuid
from datetime import datetime
from enum import Enum
from hashlib import md5
from logging import FileHandler
from threading import Lock, Thread
from time import sleep
from uuid import uuid4, UUID
from hexlib.db import PersistentState
from pydantic import BaseModel, validator
from pydantic import BaseModel
from config import logger, LOG_FOLDER
from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
from sist2 import ScanOptions, IndexOptions, Sist2
from state import RUNNING_FRONTENDS
from web import Sist2Frontend
@ -38,7 +36,8 @@ class Sist2Job(BaseModel):
schedule_enabled: bool = False
previous_index: str = None
last_index: str = None
index_path: str = None
previous_index_path: str = None
last_index_date: datetime = None
status: JobStatus = JobStatus("created")
last_modified: datetime
@ -124,10 +123,10 @@ class Sist2ScanTask(Sist2Task):
self.job.scan_options.name = self.job.name
if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
self.job.scan_options.incremental = self.job.last_index
if self.job.index_path is not None and not self.job.do_full_scan:
self.job.scan_options.output = self.job.index_path
else:
self.job.scan_options.incremental = None
self.job.scan_options.output = None
def set_pid(pid):
self.pid = pid
@ -139,19 +138,26 @@ class Sist2ScanTask(Sist2Task):
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})")
else:
index = Sist2Index(self.job.scan_options.output)
# Save latest index
self.job.previous_index = self.job.last_index
self.job.last_index = index.path
self.job.index_path = self.job.scan_options.output
self.job.last_index_date = datetime.now()
self.job.do_full_scan = False
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")
# Remove old index
if return_code == 0:
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
try:
os.remove(self.job.previous_index_path)
except FileNotFoundError:
pass
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
return return_code
@ -173,18 +179,11 @@ class Sist2IndexTask(Sist2Task):
ok = return_code == 0
if ok:
# Remove old index
if self.job.previous_index is not None:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
try:
shutil.rmtree(self.job.previous_index)
except FileNotFoundError:
pass
self.restart_running_frontends(db, sist2)
# Update status
self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
@ -198,13 +197,16 @@ class Sist2IndexTask(Sist2Task):
frontend = db["frontends"][frontend_name]
frontend: Sist2Frontend
os.kill(pid, signal.SIGTERM)
try:
os.kill(pid, signal.SIGTERM)
except ProcessLookupError:
pass
try:
os.wait()
except ChildProcessError:
pass
frontend.web_options.indices = map(lambda j: db["jobs"][j].last_index, frontend.jobs)
frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)
pid = sist2.web(frontend.web_options, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid

View File

@ -2,7 +2,6 @@ import datetime
import json
import logging
import os.path
import traceback
from datetime import datetime
from io import TextIOWrapper
from logging import FileHandler
@ -78,10 +77,10 @@ class IndexOptions(BaseModel):
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_index: str = "sist2"
incremental_index: bool = False
incremental_index: bool = True
script: str = ""
script_file: str = None
batch_size: int = 100
batch_size: int = 70
def __init__(self, **kwargs):
super().__init__(**kwargs)
@ -110,9 +109,8 @@ ARCHIVE_RECURSE = "recurse"
class ScanOptions(BaseModel):
path: str
threads: int = 1
mem_throttle: int = 0
thumbnail_quality: int = 2
thumbnail_size: int = 500
thumbnail_size: int = 552
thumbnail_count: int = 1
content_size: int = 32768
depth: int = -1
@ -128,7 +126,8 @@ class ScanOptions(BaseModel):
read_subtitles: bool = False
fast_epub: bool = False
checksums: bool = False
incremental: str = None
incremental: bool = True
optimize_index: bool = False
output: str = None
name: str = None
rewrite_url: str = None
@ -138,14 +137,15 @@ class ScanOptions(BaseModel):
super().__init__(**kwargs)
def args(self):
args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
f"--thumbnail-size={self.thumbnail_size}", f"--content-size={self.content_size}",
f"--output={self.output}", f"--depth={self.depth}", f"--archive={self.archive}",
f"--mem-buffer={self.mem_buffer}"]
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental:
args.append(f"--incremental={self.incremental}")
args.append(f"--incremental")
if self.optimize_index:
args.append(f"--optimize-index")
if self.rewrite_url:
args.append(f"--rewrite-url={self.rewrite_url}")
if self.name:
@ -235,11 +235,11 @@ class Sist2:
def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
output_dir = os.path.join(
self._data_dir,
f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
)
options.output = output_dir
if options.output is None:
options.output = os.path.join(
self._data_dir,
f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
)
args = [
self._bin_path,

View File

@ -10,7 +10,7 @@ RUNNING_FRONTENDS: Dict[str, int] = {}
TESSERACT_LANGS = get_tesseract_langs()
DB_SCHEMA_VERSION = "2"
DB_SCHEMA_VERSION = "3"
from pydantic import BaseModel
@ -76,4 +76,4 @@ def migrate_v1_to_v2(db: PersistentState):
db["sist2_admin"]["info"] = {
"version": "2"
}
}

View File

@ -75,7 +75,7 @@ export default {
}
return (this.currentThumbnailNum === 0)
? `t/${doc._source.index}/${doc._id}`
: `t/${doc._source.index}/${doc._id}${String(thumbnailNum).padStart(4, "0")}`;
: `t/${doc._source.index}/${doc._id}/${String(thumbnailNum).padStart(4, "0")}`;
},
humanTime: humanTime,
onThumbnailClick() {

View File

@ -277,10 +277,6 @@ void tpool_destroy(tpool_t *pool) {
database_close(ProcData.ipc_db, FALSE);
int count = 0;
LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_broadcast(&pool->shm->ipc_ctx.has_work_cond);
pthread_mutex_unlock(&pool->shm->mutex);

View File

@ -1,7 +1,6 @@
#include "serve.h"
#include "src/sist.h"
//#include "src/io/store.h"
#include "src/index/elastic.h"
#include "src/index/web.h"
#include "src/auth0/auth0_c_api.h"
@ -145,7 +144,7 @@ void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2, 4);
memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, 4);
int num = (int) strtol(arg_num, NULL, 10);

View File

@ -106,7 +106,7 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
find_library(CMS_LIB NAMES lcms2)
find_library(JAS_LIB NAMES jasper)
find_library(GUMBO_LIB NAMES gumbo)
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/)
find_package(Leptonica CONFIG REQUIRED)
find_package(FFMPEG REQUIRED)
find_package(libraw CONFIG REQUIRED)