Fix thumbnail copying for incremental index, fix incremental index when there are no new updates, add option for JSON logs output

This commit is contained in:
simon987 2022-11-23 20:45:47 -05:00
parent 4ec54c9a32
commit c18557e360
14 changed files with 133 additions and 26 deletions

View File

@ -75,6 +75,7 @@ typedef struct {
int verbose;
int very_verbose;
int no_color;
int json_logs;
} LogCtx_t;
typedef struct {

View File

@ -500,9 +500,6 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
json_str = realloc(json_str, json_str_len + 1);
*(json_str + json_str_len) = '\n';
zstd_write_string(json_str, json_str_len + 1);
free(json_str);
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
@ -510,6 +507,26 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf);
}
// Also copy additional thumbnails
if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
for (int i = 1; i < thumbnail_count; i++) {
char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
free(buf);
}
}
}
zstd_write_string(json_str, json_str_len + 1);
free(json_str);
}
}

View File

@ -87,7 +87,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
}
if (db_full) {
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
if (should_abort_transaction) {
mdb_txn_abort(txn);
@ -116,7 +116,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
store->path, mdb_strerror(ret), ret,
put_ret, put_ret_retry);
}
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
} else if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}

View File

@ -1,4 +1,5 @@
#include "log.h"
#include "ctx.h"
#include <pthread.h>
#include <stdarg.h>
@ -30,6 +31,30 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (LogCtx.json_logs) {
vsnprintf(log_str, sizeof(log_str), format, ap);
cJSON *log_str_json = cJSON_CreateString(log_str);
char *log_str_json_str = cJSON_PrintUnformatted(log_str_json);
cJSON *filepath_json = cJSON_CreateString(filepath);
char *filepath_json_str = cJSON_PrintUnformatted(filepath_json);
log_len = snprintf(
log_str, sizeof(log_str),
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
);
cJSON_Delete(filepath_json);
cJSON_Delete(log_str_json);
free(log_str_json_str);
free(filepath_json_str);
write(STDOUT_FILENO, log_str, log_len);
return;
}
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),
@ -97,6 +122,28 @@ void sist_log(const char *filepath, int level, char *str) {
strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
int log_len;
if (LogCtx.json_logs) {
cJSON *log_str_json = cJSON_CreateString(str);
char *log_str_json_str = cJSON_PrintUnformatted(log_str_json);
cJSON *filepath_json = cJSON_CreateString(filepath);
char *filepath_json_str = cJSON_PrintUnformatted(filepath_json);
log_len = snprintf(
log_str, sizeof(log_str),
"{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
);
cJSON_Delete(log_str_json);
cJSON_Delete(filepath_json);
free(log_str_json_str);
free(filepath_json_str);
write(STDOUT_FILENO, log_str, log_len);
return;
}
if (is_tty) {
log_len = snprintf(
log_str, sizeof(log_str),

File diff suppressed because one or more lines are too long

View File

@ -328,7 +328,7 @@ void load_incremental_index(const scan_args_t *args) {
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
TRUE
);
@ -635,6 +635,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@ -49,7 +49,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.12.2"
#define VERSION "2.13.0"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

View File

@ -34,6 +34,7 @@ typedef struct tpool {
int free_arg;
int stop;
int waiting;
int print_progress;
@ -121,22 +122,22 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
*/
__always_inline
static size_t _get_total_mem(tpool_t* pool) {
FILE* statmfile = fopen("/proc/self/statm", "r");
static size_t _get_total_mem(tpool_t *pool) {
FILE *statmfile = fopen("/proc/self/statm", "r");
if (!statmfile)
return 0;
return 0;
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
long int m_resident;
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
&dummy, /* m_virt */
&m_resident,
&dummy2, /* m_share */
&dummy3, /* m_trs */
&dummy4, /* unused since Linux 2.6; always 0 */
&dummy5, /* m_drs */
&dummy6); /* unused since Linux 2.6; always 0 */
&dummy, /* m_virt */
&m_resident,
&dummy2, /* m_share */
&dummy3, /* m_trs */
&dummy4, /* unused since Linux 2.6; always 0 */
&dummy5, /* m_drs */
&dummy6); /* unused since Linux 2.6; always 0 */
fclose(statmfile);
if (r == 7) {
@ -174,7 +175,7 @@ static void *tpool_worker(void *arg) {
if (work != NULL) {
stuck_notified = 0;
throttle_ms = 0;
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
while (!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
if (!stuck_notified && throttle_ms >= 90000) {
// notify the pool that this thread is stuck.
pthread_mutex_lock(&(pool->work_mutex));
@ -215,7 +216,13 @@ static void *tpool_worker(void *arg) {
}
if (pool->print_progress) {
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (LogCtx.json_logs) {
progress_bar_print_json(pool->done_cnt, pool->work_cnt, ScanCtx.stat_tn_size,
ScanCtx.stat_index_size, pool->waiting);
} else {
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size,
ScanCtx.stat_index_size);
}
}
if (pool->work_head == NULL) {
@ -238,6 +245,9 @@ static void *tpool_worker(void *arg) {
void tpool_wait(tpool_t *pool) {
LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
pthread_mutex_lock(&(pool->work_mutex));
pool->waiting = TRUE;
while (TRUE) {
if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
@ -250,7 +260,7 @@ void tpool_wait(tpool_t *pool) {
}
}
}
if (pool->print_progress) {
if (pool->print_progress && !LogCtx.json_logs) {
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
}
pthread_mutex_unlock(&(pool->work_mutex));
@ -312,6 +322,7 @@ tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int pri
pool->throttle_stuck_cnt = 0;
pool->mem_limit = mem_limit;
pool->stop = FALSE;
pool->waiting = FALSE;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);

View File

@ -86,6 +86,21 @@ char *expandpath(const char *path) {
int PrintingProgressBar = 0;
#define BOOLEAN_STRING(x) ((x) == 0 ? "false" : "true")
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting) {
char log_str[1024];
size_t log_len = snprintf(
log_str, sizeof(log_str),
"{\"progress\": {\"done\":%lu,\"count\":%lu,\"tn_size\":%lu,\"index_size\":%lu,\"waiting\":%s}}\n",
done, count, tn_size, index_size, BOOLEAN_STRING(waiting)
);
write(STDOUT_FILENO, log_str, log_len);
}
void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
static int last_val = -1;

View File

@ -19,6 +19,7 @@ dyn_buffer_t url_escape(char *str);
extern int PrintingProgressBar;
void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
GHashTable *incremental_get_table();

View File

@ -20,6 +20,13 @@ static struct mg_http_serve_opts DefaultServeOpts = {
};
__always_inline
static char *address_to_string(struct mg_addr *addr) {
static char address_to_string_buf[INET6_ADDRSTRLEN];
return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
}
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
nc,
@ -592,6 +599,11 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
}
}
LOG_DEBUGF("serve.c", "<%s> GET %s",
address_to_string(&(nc->rem)),
hm->uri
)
if (mg_http_match_uri(hm, "/")) {
search_index(nc, hm);
} else if (mg_http_match_uri(hm, "/favicon.ico")) {

File diff suppressed because one or more lines are too long

View File

@ -109,7 +109,7 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
find_library(CMS_LIB NAMES lcms2)
find_library(JAS_LIB NAMES jasper)
find_library(GUMBO_LIB NAMES gumbo)
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
target_compile_options(

View File

@ -28,6 +28,8 @@ ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
TessBaseAPISetSourceResolution(api, img_xres);