mirror of
https://github.com/simon987/sist2.git
synced 2025-04-10 14:06:45 +00:00
Fix thumbnail copying for incremental index, fix incremental index when there are no new updates, add option for JSON logs output
This commit is contained in:
parent
4ec54c9a32
commit
c18557e360
@ -75,6 +75,7 @@ typedef struct {
|
||||
int verbose;
|
||||
int very_verbose;
|
||||
int no_color;
|
||||
int json_logs;
|
||||
} LogCtx_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -500,9 +500,6 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
|
||||
json_str = realloc(json_str, json_str_len + 1);
|
||||
*(json_str + json_str_len) = '\n';
|
||||
|
||||
zstd_write_string(json_str, json_str_len + 1);
|
||||
free(json_str);
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
|
||||
@ -510,6 +507,26 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
// Also copy additional thumbnails
|
||||
if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
|
||||
const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
|
||||
|
||||
for (int i = 1; i < thumbnail_count; i++) {
|
||||
char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
|
||||
|
||||
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
|
||||
|
||||
buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
zstd_write_string(json_str, json_str_len + 1);
|
||||
free(json_str);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
}
|
||||
|
||||
if (db_full) {
|
||||
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
|
||||
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
|
||||
|
||||
if (should_abort_transaction) {
|
||||
mdb_txn_abort(txn);
|
||||
@ -116,7 +116,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
|
||||
store->path, mdb_strerror(ret), ret,
|
||||
put_ret, put_ret_retry);
|
||||
}
|
||||
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
||||
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
|
||||
} else if (put_ret != 0) {
|
||||
LOG_ERROR("store.c", mdb_strerror(put_ret))
|
||||
}
|
||||
|
47
src/log.c
47
src/log.c
@ -1,4 +1,5 @@
|
||||
#include "log.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdarg.h>
|
||||
@ -30,6 +31,30 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
|
||||
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
|
||||
|
||||
int log_len;
|
||||
if (LogCtx.json_logs) {
|
||||
vsnprintf(log_str, sizeof(log_str), format, ap);
|
||||
|
||||
cJSON *log_str_json = cJSON_CreateString(log_str);
|
||||
char *log_str_json_str = cJSON_PrintUnformatted(log_str_json);
|
||||
|
||||
cJSON *filepath_json = cJSON_CreateString(filepath);
|
||||
char *filepath_json_str = cJSON_PrintUnformatted(filepath_json);
|
||||
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
);
|
||||
|
||||
cJSON_Delete(filepath_json);
|
||||
cJSON_Delete(log_str_json);
|
||||
free(log_str_json_str);
|
||||
free(filepath_json_str);
|
||||
|
||||
write(STDOUT_FILENO, log_str, log_len);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
@ -97,6 +122,28 @@ void sist_log(const char *filepath, int level, char *str) {
|
||||
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
|
||||
|
||||
int log_len;
|
||||
|
||||
if (LogCtx.json_logs) {
|
||||
cJSON *log_str_json = cJSON_CreateString(str);
|
||||
char *log_str_json_str = cJSON_PrintUnformatted(log_str_json);
|
||||
|
||||
cJSON *filepath_json = cJSON_CreateString(filepath);
|
||||
char *filepath_json_str = cJSON_PrintUnformatted(filepath_json);
|
||||
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
|
||||
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
|
||||
);
|
||||
|
||||
cJSON_Delete(log_str_json);
|
||||
cJSON_Delete(filepath_json);
|
||||
free(log_str_json_str);
|
||||
free(filepath_json_str);
|
||||
|
||||
write(STDOUT_FILENO, log_str, log_len);
|
||||
return;
|
||||
}
|
||||
if (is_tty) {
|
||||
log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
|
2
src/magic_generated.c
vendored
2
src/magic_generated.c
vendored
File diff suppressed because one or more lines are too long
@ -328,7 +328,7 @@ void load_incremental_index(const scan_args_t *args) {
|
||||
file_path,
|
||||
args->incremental,
|
||||
incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
|
||||
LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
|
||||
TRUE
|
||||
);
|
||||
|
||||
@ -635,6 +635,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
|
@ -49,7 +49,7 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "2.12.2"
|
||||
#define VERSION "2.13.0"
|
||||
static const char *const Version = VERSION;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
|
37
src/tpool.c
37
src/tpool.c
@ -34,6 +34,7 @@ typedef struct tpool {
|
||||
|
||||
int free_arg;
|
||||
int stop;
|
||||
int waiting;
|
||||
|
||||
int print_progress;
|
||||
|
||||
@ -121,22 +122,22 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
|
||||
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
|
||||
*/
|
||||
__always_inline
|
||||
static size_t _get_total_mem(tpool_t* pool) {
|
||||
FILE* statmfile = fopen("/proc/self/statm", "r");
|
||||
static size_t _get_total_mem(tpool_t *pool) {
|
||||
FILE *statmfile = fopen("/proc/self/statm", "r");
|
||||
if (!statmfile)
|
||||
return 0;
|
||||
return 0;
|
||||
|
||||
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
|
||||
long int m_resident;
|
||||
|
||||
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
|
||||
&dummy, /* m_virt */
|
||||
&m_resident,
|
||||
&dummy2, /* m_share */
|
||||
&dummy3, /* m_trs */
|
||||
&dummy4, /* unused since Linux 2.6; always 0 */
|
||||
&dummy5, /* m_drs */
|
||||
&dummy6); /* unused since Linux 2.6; always 0 */
|
||||
&dummy, /* m_virt */
|
||||
&m_resident,
|
||||
&dummy2, /* m_share */
|
||||
&dummy3, /* m_trs */
|
||||
&dummy4, /* unused since Linux 2.6; always 0 */
|
||||
&dummy5, /* m_drs */
|
||||
&dummy6); /* unused since Linux 2.6; always 0 */
|
||||
fclose(statmfile);
|
||||
|
||||
if (r == 7) {
|
||||
@ -174,7 +175,7 @@ static void *tpool_worker(void *arg) {
|
||||
if (work != NULL) {
|
||||
stuck_notified = 0;
|
||||
throttle_ms = 0;
|
||||
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
|
||||
while (!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
|
||||
if (!stuck_notified && throttle_ms >= 90000) {
|
||||
// notify the pool that this thread is stuck.
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
@ -215,7 +216,13 @@ static void *tpool_worker(void *arg) {
|
||||
}
|
||||
|
||||
if (pool->print_progress) {
|
||||
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
if (LogCtx.json_logs) {
|
||||
progress_bar_print_json(pool->done_cnt, pool->work_cnt, ScanCtx.stat_tn_size,
|
||||
ScanCtx.stat_index_size, pool->waiting);
|
||||
} else {
|
||||
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size,
|
||||
ScanCtx.stat_index_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (pool->work_head == NULL) {
|
||||
@ -238,6 +245,9 @@ static void *tpool_worker(void *arg) {
|
||||
void tpool_wait(tpool_t *pool) {
|
||||
LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
|
||||
pool->waiting = TRUE;
|
||||
|
||||
while (TRUE) {
|
||||
if (pool->done_cnt < pool->work_cnt) {
|
||||
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
|
||||
@ -250,7 +260,7 @@ void tpool_wait(tpool_t *pool) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pool->print_progress) {
|
||||
if (pool->print_progress && !LogCtx.json_logs) {
|
||||
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
}
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
@ -312,6 +322,7 @@ tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int pri
|
||||
pool->throttle_stuck_cnt = 0;
|
||||
pool->mem_limit = mem_limit;
|
||||
pool->stop = FALSE;
|
||||
pool->waiting = FALSE;
|
||||
pool->free_arg = free_arg;
|
||||
pool->cleanup_func = cleanup_func;
|
||||
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
|
||||
|
15
src/util.c
15
src/util.c
@ -86,6 +86,21 @@ char *expandpath(const char *path) {
|
||||
|
||||
int PrintingProgressBar = 0;
|
||||
|
||||
#define BOOLEAN_STRING(x) ((x) == 0 ? "false" : "true")
|
||||
|
||||
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting) {
|
||||
|
||||
char log_str[1024];
|
||||
|
||||
size_t log_len = snprintf(
|
||||
log_str, sizeof(log_str),
|
||||
"{\"progress\": {\"done\":%lu,\"count\":%lu,\"tn_size\":%lu,\"index_size\":%lu,\"waiting\":%s}}\n",
|
||||
done, count, tn_size, index_size, BOOLEAN_STRING(waiting)
|
||||
);
|
||||
|
||||
write(STDOUT_FILENO, log_str, log_len);
|
||||
}
|
||||
|
||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
|
||||
|
||||
static int last_val = -1;
|
||||
|
@ -19,6 +19,7 @@ dyn_buffer_t url_escape(char *str);
|
||||
|
||||
extern int PrintingProgressBar;
|
||||
|
||||
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting);
|
||||
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
|
||||
|
||||
GHashTable *incremental_get_table();
|
||||
|
@ -20,6 +20,13 @@ static struct mg_http_serve_opts DefaultServeOpts = {
|
||||
};
|
||||
|
||||
|
||||
__always_inline
|
||||
static char *address_to_string(struct mg_addr *addr) {
|
||||
static char address_to_string_buf[INET6_ADDRSTRLEN];
|
||||
|
||||
return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
|
||||
}
|
||||
|
||||
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
|
||||
mg_printf(
|
||||
nc,
|
||||
@ -592,6 +599,11 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUGF("serve.c", "<%s> GET %s",
|
||||
address_to_string(&(nc->rem)),
|
||||
hm->uri
|
||||
)
|
||||
|
||||
if (mg_http_match_uri(hm, "/")) {
|
||||
search_index(nc, hm);
|
||||
} else if (mg_http_match_uri(hm, "/favicon.ico")) {
|
||||
|
8
src/web/static_generated.c
vendored
8
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
2
third-party/libscan/CMakeLists.txt
vendored
2
third-party/libscan/CMakeLists.txt
vendored
@ -109,7 +109,7 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
|
||||
find_library(CMS_LIB NAMES lcms2)
|
||||
find_library(JAS_LIB NAMES jasper)
|
||||
find_library(GUMBO_LIB NAMES gumbo)
|
||||
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
|
||||
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
|
||||
|
||||
|
||||
target_compile_options(
|
||||
|
2
third-party/libscan/libscan/ocr/ocr.h
vendored
2
third-party/libscan/libscan/ocr/ocr.h
vendored
@ -28,6 +28,8 @@ ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
|
||||
TessBaseAPI *api = TessBaseAPICreate();
|
||||
TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
|
||||
|
||||
TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
|
||||
|
||||
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
|
||||
TessBaseAPISetSourceResolution(api, img_xres);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user