Compare commits

...

8 Commits

Author SHA1 Message Date
f984baf7fd Fix #373 2023-06-10 10:59:25 -04:00
ce242d1053 Fix #372 2023-06-09 08:16:10 -04:00
71deab7fa2 Merge pull request #371 from dpieski/patch-3
Only remove files with job_name.
2023-06-08 18:02:20 -04:00
Andrew
b0462f9378 Only remove files with job_name. 2023-06-08 11:50:39 -05:00
ca845d80e8 Version bump 2023-06-07 20:40:45 -04:00
e2025df2c0 sist2-admin: don't set status to failed when using debug binary 2023-06-07 20:40:11 -04:00
7eb064162e close db connection before loop #346 2023-06-07 20:25:13 -04:00
7bc4b73e43 Use relative paths in sist2-admin #369 2023-06-07 19:59:50 -04:00
8 changed files with 46 additions and 34 deletions

View File

@@ -134,7 +134,7 @@ export default {
duration: this.taskDuration(row), duration: this.taskDuration(row),
time: moment.utc(row.started).local().format("dd, MMM Do YYYY, HH:mm:ss"), time: moment.utc(row.started).local().format("dd, MMM Do YYYY, HH:mm:ss"),
logs: null, logs: null,
status: row.return_code === 0 ? "ok" : "failed", status: [0,1].includes(row.return_code) ? "ok" : "failed",
_row: row _row: row
})); }));
}); });

View File

@@ -275,7 +275,10 @@ def check_es_version(es_url: str, insecure: bool):
def start_frontend_(frontend: Sist2Frontend): def start_frontend_(frontend: Sist2Frontend):
frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs)) frontend.web_options.indices = [
os.path.join(DATA_FOLDER, db["jobs"][j].index_path)
for j in frontend.jobs
]
backend_name = frontend.web_options.search_backend backend_name = frontend.web_options.search_backend
search_backend = db["search_backends"][backend_name] search_backend = db["search_backends"][backend_name]
@@ -354,7 +357,7 @@ def delete_search_backend(name: str):
del db["search_backends"][name] del db["search_backends"][name]
try: try:
os.remove(backend.search_index) os.remove(os.path.join(DATA_FOLDER, backend.search_index))
except: except:
pass pass

View File

@@ -10,7 +10,9 @@ from jobs import Sist2Job
def _check_schedule(db: PersistentState, run_job): def _check_schedule(db: PersistentState, run_job):
for job in db["jobs"]: jobs = list(db["jobs"])
for job in jobs:
job: Sist2Job job: Sist2Job
if job.schedule_enabled: if job.schedule_enabled:

View File

@@ -13,7 +13,7 @@ from uuid import uuid4, UUID
from hexlib.db import PersistentState from hexlib.db import PersistentState
from pydantic import BaseModel from pydantic import BaseModel
from config import logger, LOG_FOLDER from config import logger, LOG_FOLDER, DATA_FOLDER
from notifications import Notifications from notifications import Notifications
from sist2 import ScanOptions, IndexOptions, Sist2 from sist2 import ScanOptions, IndexOptions, Sist2
from state import RUNNING_FRONTENDS, get_log_files_to_remove, delete_log_file from state import RUNNING_FRONTENDS, get_log_files_to_remove, delete_log_file
@@ -131,7 +131,9 @@ class Sist2ScanTask(Sist2Task):
return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid) return_code = sist2.scan(self.job.scan_options, logs_cb=self.log_callback, set_pid_cb=set_pid)
self.ended = datetime.utcnow() self.ended = datetime.utcnow()
if return_code != 0: is_ok = return_code in (0, 1)
if not is_ok:
self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"})) self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
logger.info(f"Task {self.display_name} failed ({return_code})") logger.info(f"Task {self.display_name} failed ({return_code})")
else: else:
@@ -144,7 +146,7 @@ class Sist2ScanTask(Sist2Task):
logger.info(f"Completed {self.display_name} ({return_code=})") logger.info(f"Completed {self.display_name} ({return_code=})")
# Remove old index # Remove old index
if return_code == 0: if is_ok:
if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path: if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"})) self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
try: try:
@@ -218,7 +220,10 @@ class Sist2IndexTask(Sist2Task):
logger.debug(f"Fetched search backend options for {backend_name}") logger.debug(f"Fetched search backend options for {backend_name}")
frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs) frontend.web_options.indices = [
os.path.join(DATA_FOLDER, db["jobs"][j].index_path)
for j in frontend.jobs
]
pid = sist2.web(frontend.web_options, search_backend, frontend.name) pid = sist2.web(frontend.web_options, search_backend, frontend.name)
RUNNING_FRONTENDS[frontend_name] = pid RUNNING_FRONTENDS[frontend_name] = pid
@@ -244,7 +249,7 @@ class TaskQueue:
def _tasks_failed(self): def _tasks_failed(self):
done = set() done = set()
for row in self._db["task_done"].sql("WHERE return_code != 0"): for row in self._db["task_done"].sql("WHERE return_code NOT IN (0,1)"):
done.add(uuid.UUID(row["id"])) done.add(uuid.UUID(row["id"]))
return done return done

View File

@@ -49,7 +49,7 @@ class Sist2SearchBackend(BaseModel):
def create_default(name: str, backend_type: SearchBackendType = SearchBackendType("elasticsearch")): def create_default(name: str, backend_type: SearchBackendType = SearchBackendType("elasticsearch")):
return Sist2SearchBackend( return Sist2SearchBackend(
name=name, name=name,
search_index=os.path.join(DATA_FOLDER, f"search-index-{name.replace('/', '_')}.sist2"), search_index=f"search-index-{name.replace('/', '_')}.sist2",
backend_type=backend_type backend_type=backend_type
) )
@@ -63,10 +63,13 @@ class IndexOptions(BaseModel):
super().__init__(**kwargs) super().__init__(**kwargs)
def args(self, search_backend): def args(self, search_backend):
absolute_path = os.path.join(DATA_FOLDER, self.path)
if search_backend.backend_type == SearchBackendType("sqlite"): if search_backend.backend_type == SearchBackendType("sqlite"):
args = ["sqlite-index", self.path, "--search-index", search_backend.search_index] search_index_absolute = os.path.join(DATA_FOLDER, search_backend.search_index)
args = ["sqlite-index", absolute_path, "--search-index", search_index_absolute]
else: else:
args = ["index", self.path, f"--threads={search_backend.threads}", args = ["index", absolute_path, f"--threads={search_backend.threads}",
f"--es-url={search_backend.es_url}", f"--es-url={search_backend.es_url}",
f"--es-index={search_backend.es_index}", f"--es-index={search_backend.es_index}",
f"--batch-size={search_backend.batch_size}"] f"--batch-size={search_backend.batch_size}"]
@@ -118,9 +121,12 @@ class ScanOptions(BaseModel):
super().__init__(**kwargs) super().__init__(**kwargs)
def args(self): def args(self):
output_path = os.path.join(DATA_FOLDER, self.output)
args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}", args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}", f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}", f"--content-size={self.content_size}", f"--output={output_path}", f"--depth={self.depth}",
f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"] f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
if self.incremental: if self.incremental:
@@ -181,6 +187,7 @@ class Sist2Index:
def name(self) -> str: def name(self) -> str:
return self._descriptor["name"] return self._descriptor["name"]
class WebOptions(BaseModel): class WebOptions(BaseModel):
indices: List[str] = [] indices: List[str] = []
@@ -206,7 +213,8 @@ class WebOptions(BaseModel):
f"--lang={self.lang}"] f"--lang={self.lang}"]
if search_backend.backend_type == SearchBackendType("sqlite"): if search_backend.backend_type == SearchBackendType("sqlite"):
args.append(f"--search-index={search_backend.search_index}") search_index_absolute = os.path.join(DATA_FOLDER, search_backend.search_index)
args.append(f"--search-index={search_index_absolute}")
else: else:
args.append(f"--es-url={search_backend.es_url}") args.append(f"--es-url={search_backend.es_url}")
args.append(f"--es-index={search_backend.es_index}") args.append(f"--es-index={search_backend.es_index}")
@@ -232,10 +240,11 @@ class WebOptions(BaseModel):
return args return args
class Sist2: class Sist2:
def __init__(self, bin_path: str, data_directory: str): def __init__(self, bin_path: str, data_directory: str):
self._bin_path = bin_path self.bin_path = bin_path
self._data_dir = data_directory self._data_dir = data_directory
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb): def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb):
@@ -248,7 +257,7 @@ class Sist2:
search_backend.script_file = None search_backend.script_file = None
args = [ args = [
self._bin_path, self.bin_path,
*options.args(search_backend), *options.args(search_backend),
"--json-logs", "--json-logs",
"--very-verbose" "--very-verbose"
@@ -269,13 +278,10 @@ class Sist2:
def scan(self, options: ScanOptions, logs_cb, set_pid_cb): def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
if options.output is None: if options.output is None:
options.output = os.path.join( options.output = f"scan-{options.name.replace('/', '_')}-{datetime.utcnow()}.sist2"
self._data_dir,
f"scan-{options.name.replace('/', '_')}-{datetime.utcnow()}.sist2"
)
args = [ args = [
self._bin_path, self.bin_path,
*options.args(), *options.args(),
"--json-logs", "--json-logs",
"--very-verbose" "--very-verbose"
@@ -333,7 +339,7 @@ class Sist2:
options.auth0_public_key_file = None options.auth0_public_key_file = None
args = [ args = [
self._bin_path, self.bin_path,
*options.args(search_backend) *options.args(search_backend)
] ]
@@ -354,5 +360,3 @@ class Sist2:
t_stdout.start() t_stdout.start()
return proc.pid return proc.pid

View File

@@ -65,8 +65,8 @@ def get_log_files_to_remove(db: PersistentState, job_name: str, n: int):
if row["name"].endswith(f"[{job_name}]"): if row["name"].endswith(f"[{job_name}]"):
counter += 1 counter += 1
if counter > n: if counter > n:
to_remove.append(row) to_remove.append(row)
return to_remove return to_remove

View File

@@ -53,9 +53,9 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
} else if (IS_FONT(mime)) { } else if (IS_FONT(mime)) {
return FILETYPE_FONT; return FILETYPE_FONT;
} else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && ( } else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(mime) || IS_ARC(mime) ||
(IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath)) (IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
)) { )) {
return FILETYPE_ARCHIVE; return FILETYPE_ARCHIVE;
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(mime)) { } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(mime)) {
return FILETYPE_OOXML; return FILETYPE_OOXML;
@@ -155,19 +155,17 @@ void parse(parse_job_t *job) {
doc->meta_head = NULL; doc->meta_head = NULL;
doc->meta_tail = NULL; doc->meta_tail = NULL;
doc->size = job->vfile.st_size; doc->size = job->vfile.st_size;
doc->mtime = job->vfile.mtime; doc->mtime = MAX(job->vfile.mtime, 0);
doc->mime = get_mime(job); doc->mime = get_mime(job);
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id); generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
if (doc->mime == GET_MIME_ERROR_FATAL) { if (doc->mime == GET_MIME_ERROR_FATAL) {
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
free(doc); free(doc);
return; return;
} }
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) { if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
free(doc); free(doc);
return; return;

View File

@@ -51,11 +51,11 @@
#include <ctype.h> #include <ctype.h>
#include "git_hash.h" #include "git_hash.h"
#define VERSION "3.1.1" #define VERSION "3.1.2"
static const char *const Version = VERSION; static const char *const Version = VERSION;
static const int VersionMajor = 3; static const int VersionMajor = 3;
static const int VersionMinor = 1; static const int VersionMinor = 1;
static const int VersionPatch = 1; static const int VersionPatch = 2;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown #define SIST_PLATFORM unknown