mirror of
https://github.com/simon987/sist2.git
synced 2025-12-19 02:09:06 +00:00
use sqlite to save index, major thread pool refactor
This commit is contained in:
437
src/main.c
437
src/main.c
@@ -5,8 +5,6 @@
|
||||
#include <locale.h>
|
||||
|
||||
#include "cli.h"
|
||||
#include "io/serialize.h"
|
||||
#include "io/store.h"
|
||||
#include "tpool.h"
|
||||
#include "io/walk.h"
|
||||
#include "index/elastic.h"
|
||||
@@ -16,10 +14,9 @@
|
||||
#include "auth0/auth0_c_api.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "stats.h"
|
||||
#include "src/database/database.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
@@ -46,30 +43,31 @@ void sig_handler(int signum) {
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
if (ScanCtx.dbg_current_files != NULL) {
|
||||
GHashTableIter iter;
|
||||
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
|
||||
void *key;
|
||||
void *value;
|
||||
while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
parse_job_t *job = value;
|
||||
|
||||
if (isatty(STDERR_FILENO)) {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
);
|
||||
} else {
|
||||
LOG_DEBUGF(
|
||||
"*SIGNAL HANDLER*",
|
||||
"THREAD [%04llX] was working on job %s",
|
||||
key, job->filepath
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: Print debug info
|
||||
// if (ScanCtx.dbg_current_files != NULL) {
|
||||
// GHashTableIter iter;
|
||||
// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
//
|
||||
// void *key;
|
||||
// void *value;
|
||||
// while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
// parse_job_t *job = value;
|
||||
//
|
||||
// if (isatty(STDERR_FILENO)) {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
// 31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
// );
|
||||
// } else {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "THREAD [%04llX] was working on job %s",
|
||||
// key, job->filepath
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (ScanCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
@@ -82,18 +80,18 @@ void sig_handler(int signum) {
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
)
|
||||
);
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
)
|
||||
);
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
)
|
||||
);
|
||||
#endif
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
@@ -105,36 +103,59 @@ void sig_handler(int signum) {
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void init_dir(const char *dirpath, scan_args_t *args) {
|
||||
char path[PATH_MAX];
|
||||
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
|
||||
void database_scan_begin(scan_args_t *args) {
|
||||
index_descriptor_t *desc = &ScanCtx.index.desc;
|
||||
|
||||
time(&ScanCtx.index.desc.timestamp);
|
||||
strcpy(ScanCtx.index.desc.version, Version);
|
||||
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
|
||||
database_t *db = database_create(args->output, INDEX_DATABASE);
|
||||
|
||||
if (args->incremental) {
|
||||
// Update existing descriptor
|
||||
database_open(db);
|
||||
index_descriptor_t *original_desc = database_read_index_descriptor(db);
|
||||
|
||||
// copy original index id
|
||||
strcpy(desc->id, original_desc->id);
|
||||
|
||||
if (original_desc->version_major != VersionMajor) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
|
||||
}
|
||||
|
||||
strcpy(original_desc->root, desc->root);
|
||||
original_desc->root_len = desc->root_len;
|
||||
strcpy(original_desc->rewrite_url, desc->rewrite_url);
|
||||
strcpy(original_desc->name, desc->name);
|
||||
|
||||
time(&original_desc->timestamp);
|
||||
|
||||
database_write_index_descriptor(db, original_desc);
|
||||
free(original_desc);
|
||||
|
||||
database_incremental_scan_begin(db);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
// copy old index id
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
} else {
|
||||
// Create new descriptor
|
||||
|
||||
time(&desc->timestamp);
|
||||
strcpy(desc->version, Version);
|
||||
desc->version_major = VersionMajor;
|
||||
desc->version_minor = VersionMinor;
|
||||
desc->version_patch = VersionPatch;
|
||||
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
||||
database_initialize(db);
|
||||
database_open(db);
|
||||
database_write_index_descriptor(db, desc);
|
||||
}
|
||||
|
||||
write_index_descriptor(path, &ScanCtx.index.desc);
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void scan_print_header() {
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version)
|
||||
}
|
||||
|
||||
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
|
||||
void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
|
||||
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
@@ -177,11 +198,8 @@ void _logf(const char *filepath, int level, char *format, ...) {
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
|
||||
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
|
||||
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
|
||||
// TODO: shared
|
||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||
pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
|
||||
|
||||
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||
|
||||
@@ -189,7 +207,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse_job;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
} else {
|
||||
@@ -199,12 +217,12 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.store = _store;
|
||||
ScanCtx.comic_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.comic_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
|
||||
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr");
|
||||
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz");
|
||||
|
||||
// Ebook
|
||||
ScanCtx.ebook_ctx.content_size = args->content_size;
|
||||
@@ -216,7 +234,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
}
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.store = _store;
|
||||
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
|
||||
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
@@ -224,7 +242,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.store = _store;
|
||||
ScanCtx.font_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
|
||||
@@ -232,7 +250,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.media_ctx.tn_count = args->tn_count;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.store = _store;
|
||||
ScanCtx.media_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
ScanCtx.media_ctx.read_subtitles = args->tn_count;
|
||||
@@ -248,7 +266,7 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.store = _store;
|
||||
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
@@ -264,8 +282,8 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.store = _store;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
|
||||
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
@@ -283,174 +301,67 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.store = _store;
|
||||
ScanCtx.raw_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
ScanCtx.wpd_ctx.log = _log;
|
||||
ScanCtx.wpd_ctx.logf = _logf;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
|
||||
|
||||
// Json
|
||||
ScanCtx.json_ctx.content_size = args->content_size;
|
||||
ScanCtx.json_ctx.log = _log;
|
||||
ScanCtx.json_ctx.logf = _logf;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an existing index as the baseline for incremental scanning.
|
||||
* 1. load old index files (original+main) => original_table
|
||||
* 2. allocate empty table => copy_table
|
||||
* 3. allocate empty table => new_table
|
||||
* the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
|
||||
* and consumed in main.c:save_incremental_index
|
||||
*
|
||||
* Note: the existing index may or may not be of incremental index form.
|
||||
*/
|
||||
void load_incremental_index(const scan_args_t *args) {
|
||||
char file_path[PATH_MAX];
|
||||
|
||||
ScanCtx.original_table = incremental_get_table();
|
||||
ScanCtx.copy_table = incremental_get_table();
|
||||
ScanCtx.new_table = incremental_get_table();
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
if (strcmp(original_desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
|
||||
}
|
||||
|
||||
READ_INDICES(
|
||||
file_path,
|
||||
args->incremental,
|
||||
incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||
LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
|
||||
TRUE
|
||||
);
|
||||
|
||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves an incremental index.
|
||||
* Before calling this function, the scanner should have finished writing the main index.
|
||||
* 1. Build original_table - new_table => delete_table
|
||||
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
|
||||
*/
|
||||
void save_incremental_index(scan_args_t *args) {
|
||||
char dst_path[PATH_MAX];
|
||||
char store_path[PATH_MAX];
|
||||
char file_path[PATH_MAX];
|
||||
char del_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
|
||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||
|
||||
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
|
||||
g_hash_table_size(ScanCtx.original_table),
|
||||
g_hash_table_size(ScanCtx.copy_table),
|
||||
g_hash_table_size(ScanCtx.new_table));
|
||||
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
|
||||
READ_INDICES(file_path, args->incremental,
|
||||
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
|
||||
perror("incremental_delete"), 1);
|
||||
writer_cleanup();
|
||||
|
||||
READ_INDICES(file_path, args->incremental,
|
||||
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
|
||||
perror("incremental_copy"), 1);
|
||||
writer_cleanup();
|
||||
|
||||
store_destroy(source);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||
store_copy(source_tags, dst_path);
|
||||
store_destroy(source_tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* An index can be either incremental or non-incremental (initial index).
|
||||
* For an initial index, there is only the "main" index.
|
||||
* For an incremental index, there are, additionally:
|
||||
* - An "original" index, referencing all files unchanged since the previous index.
|
||||
* - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
|
||||
* Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
|
||||
* and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
|
||||
* the old "delete" index can be safely ignored.
|
||||
*/
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
|
||||
ScanCtx.mime_table = mime_get_mime_table();
|
||||
ScanCtx.ext_table = mime_get_ext_table();
|
||||
|
||||
initialize_scan_context(args);
|
||||
|
||||
init_dir(ScanCtx.index.path, args);
|
||||
database_scan_begin(args);
|
||||
|
||||
char store_path[PATH_MAX];
|
||||
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
|
||||
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
|
||||
LOG_INFOF("main.c", "sist2 v%s", Version);
|
||||
|
||||
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
|
||||
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
|
||||
|
||||
scan_print_header();
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
load_incremental_index(args);
|
||||
}
|
||||
|
||||
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, FALSE);
|
||||
tpool_start(ScanCtx.writer_pool);
|
||||
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE);
|
||||
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
|
||||
tpool_start(ScanCtx.pool);
|
||||
|
||||
if (args->list_path) {
|
||||
// Scan using file list
|
||||
int list_ret = iterate_file_list(args->list_file);
|
||||
if (list_ret != 0) {
|
||||
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
|
||||
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret);
|
||||
}
|
||||
} else {
|
||||
// Scan directory recursively
|
||||
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
|
||||
if (walk_ret == -1) {
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
|
||||
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
tpool_wait(ScanCtx.writer_pool);
|
||||
tpool_destroy(ScanCtx.writer_pool);
|
||||
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count);
|
||||
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count);
|
||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count);
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
|
||||
|
||||
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
|
||||
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
|
||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
|
||||
database_t *db = database_create(args->output, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
if (args->incremental != NULL) {
|
||||
save_incremental_index(args);
|
||||
if (args->incremental != FALSE) {
|
||||
database_incremental_scan_end(db);
|
||||
}
|
||||
|
||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||
|
||||
store_destroy(ScanCtx.index.store);
|
||||
store_destroy(ScanCtx.index.meta_store);
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, TRUE);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
char file_path[PATH_MAX];
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
|
||||
@@ -461,91 +372,69 @@ void sist2_index(index_args_t *args) {
|
||||
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
||||
}
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
index_descriptor_t *desc = database_read_index_descriptor(db);
|
||||
database_close(db, FALSE);
|
||||
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
LOG_DEBUGF("main.c", "Index version %s", desc->version);
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
|
||||
if (strcmp(desc.version, Version) != 0) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version)
|
||||
if (desc->version_major != VersionMajor) {
|
||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version);
|
||||
}
|
||||
|
||||
DIR *dir = opendir(args->index_path);
|
||||
if (dir == NULL) {
|
||||
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
|
||||
}
|
||||
|
||||
char path_tmp[PATH_MAX];
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
|
||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||
|
||||
snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
|
||||
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
||||
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
||||
|
||||
index_func f;
|
||||
if (args->print) {
|
||||
f = print_json;
|
||||
} else {
|
||||
f = index_json;
|
||||
}
|
||||
|
||||
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, args->print == 0);
|
||||
IndexCtx.pool = tpool_create(args->threads, args->print == FALSE);
|
||||
tpool_start(IndexCtx.pool);
|
||||
|
||||
READ_INDICES(file_path, args->index_path, {
|
||||
read_index(file_path, desc.id, desc.type, f);
|
||||
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
|
||||
}, {}, !args->incremental);
|
||||
int cnt = 0;
|
||||
|
||||
// Only read the _delete index if we're sending data to ES
|
||||
if (!args->print) {
|
||||
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
|
||||
if (0 == access(file_path, R_OK)) {
|
||||
read_lines(file_path, (line_processor_t) {
|
||||
.data = NULL,
|
||||
.func = delete_document
|
||||
});
|
||||
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
|
||||
db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
database_iterator_t *iterator = database_create_document_iterator(db);
|
||||
database_document_iter_foreach(json, iterator) {
|
||||
const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
|
||||
if (args->print) {
|
||||
print_json(json, doc_id);
|
||||
} else {
|
||||
index_json(json, doc_id);
|
||||
cnt +=1;
|
||||
}
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
free(iterator);
|
||||
database_close(db, FALSE);
|
||||
|
||||
// Only read the _delete index if we're sending data to ES
|
||||
if (!args->print) {
|
||||
// TODO: (delete_list iterator)
|
||||
}
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
|
||||
tpool_destroy(IndexCtx.pool);
|
||||
|
||||
if (IndexCtx.needs_es_connection) {
|
||||
finish_indexer(args->script, args->async_script, desc.id);
|
||||
finish_indexer(args->script, args->async_script, desc->id);
|
||||
}
|
||||
|
||||
store_destroy(IndexCtx.tag_store);
|
||||
store_destroy(IndexCtx.meta_store);
|
||||
g_hash_table_remove_all(IndexCtx.tags);
|
||||
g_hash_table_destroy(IndexCtx.tags);
|
||||
free(desc);
|
||||
}
|
||||
|
||||
void sist2_exec_script(exec_args_t *args) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
|
||||
char descriptor_path[PATH_MAX];
|
||||
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.es_index = args->es_index;
|
||||
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
|
||||
IndexCtx.needs_es_connection = TRUE;
|
||||
|
||||
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
|
||||
database_t *db = database_create(args->index_path, INDEX_DATABASE);
|
||||
database_open(db);
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc.id);
|
||||
index_descriptor_t *desc = database_read_index_descriptor(db);
|
||||
LOG_DEBUGF("main.c", "Index version %s", desc->version);
|
||||
|
||||
execute_update_script(args->script, args->async_script, desc->id);
|
||||
free(args->script);
|
||||
database_close(db, FALSE);
|
||||
}
|
||||
|
||||
void sist2_web(web_args_t *args) {
|
||||
@@ -569,23 +458,17 @@ void sist2_web(web_args_t *args) {
|
||||
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
if (abs_path == NULL) {
|
||||
return;
|
||||
}
|
||||
char path_tmp[PATH_MAX];
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
|
||||
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
|
||||
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||
|
||||
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
|
||||
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
|
||||
|
||||
strcpy(WebCtx.indices[i].path, abs_path);
|
||||
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
|
||||
|
||||
WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
|
||||
database_open(WebCtx.indices[i].db);
|
||||
|
||||
index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
|
||||
WebCtx.indices[i].desc = *desc;
|
||||
free(desc);
|
||||
|
||||
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name);
|
||||
free(abs_path);
|
||||
}
|
||||
|
||||
@@ -600,7 +483,7 @@ void sist2_web(web_args_t *args) {
|
||||
* Negative number -> Raise error
|
||||
* Specified a valid number -> Continue as normal
|
||||
*/
|
||||
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
|
||||
int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) {
|
||||
int specified_value = *(int *) option->value;
|
||||
|
||||
if (specified_value == 0) {
|
||||
@@ -613,6 +496,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars
|
||||
}
|
||||
}
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
// sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
@@ -645,8 +529,8 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
|
||||
"Thumbnail size, in pixels. DEFAULT=500",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
|
||||
@@ -656,7 +540,8 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
|
||||
// TODO: Update help string
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
@@ -692,7 +577,8 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||
@@ -701,20 +587,22 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
|
||||
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
|
||||
OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"),
|
||||
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from <domain>/pem"),
|
||||
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path,
|
||||
"Path to Auth0 public key file extracted from <domain>/pem"),
|
||||
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
|
||||
OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
|
||||
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
|
||||
@@ -722,7 +610,8 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
@@ -800,7 +689,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
} else {
|
||||
argparse_usage(&argparse);
|
||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
|
||||
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user