use sqlite to save index, major thread pool refactor

This commit is contained in:
2023-04-03 21:39:50 -04:00
parent ca973d63a4
commit fc36f33d52
62 changed files with 3630 additions and 4673 deletions

View File

@@ -5,8 +5,6 @@
#include <locale.h>
#include "cli.h"
#include "io/serialize.h"
#include "io/store.h"
#include "tpool.h"
#include "io/walk.h"
#include "index/elastic.h"
@@ -16,10 +14,9 @@
#include "auth0/auth0_c_api.h"
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <pthread.h>
#include "stats.h"
#include "src/database/database.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
@@ -46,30 +43,31 @@ void sig_handler(int signum) {
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
if (ScanCtx.dbg_current_files != NULL) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
void *key;
void *value;
while (g_hash_table_iter_next(&iter, &key, &value)) {
parse_job_t *job = value;
if (isatty(STDERR_FILENO)) {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
31 + ((unsigned int) key) % 7, key, job->filepath
);
} else {
LOG_DEBUGF(
"*SIGNAL HANDLER*",
"THREAD [%04llX] was working on job %s",
key, job->filepath
);
}
}
}
// TODO: Print debug info
// if (ScanCtx.dbg_current_files != NULL) {
// GHashTableIter iter;
// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
//
// void *key;
// void *value;
// while (g_hash_table_iter_next(&iter, &key, &value)) {
// parse_job_t *job = value;
//
// if (isatty(STDERR_FILENO)) {
// LOG_DEBUGF(
// "*SIGNAL HANDLER*",
// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
// 31 + ((unsigned int) key) % 7, key, job->filepath
// );
// } else {
// LOG_DEBUGF(
// "*SIGNAL HANDLER*",
// "THREAD [%04llX] was working on job %s",
// key, job->filepath
// );
// }
// }
// }
if (ScanCtx.pool != NULL) {
tpool_dump_debug_info(ScanCtx.pool);
@@ -82,18 +80,18 @@ void sig_handler(int signum) {
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
)
);
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
)
);
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
)
);
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
@@ -105,36 +103,59 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
void database_scan_begin(scan_args_t *args) {
index_descriptor_t *desc = &ScanCtx.index.desc;
time(&ScanCtx.index.desc.timestamp);
strcpy(ScanCtx.index.desc.version, Version);
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
database_t *db = database_create(args->output, INDEX_DATABASE);
if (args->incremental) {
// Update existing descriptor
database_open(db);
index_descriptor_t *original_desc = database_read_index_descriptor(db);
// copy original index id
strcpy(desc->id, original_desc->id);
if (original_desc->version_major != VersionMajor) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
}
strcpy(original_desc->root, desc->root);
original_desc->root_len = desc->root_len;
strcpy(original_desc->rewrite_url, desc->rewrite_url);
strcpy(original_desc->name, desc->name);
time(&original_desc->timestamp);
database_write_index_descriptor(db, original_desc);
free(original_desc);
database_incremental_scan_begin(db);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// Create new descriptor
time(&desc->timestamp);
strcpy(desc->version, Version);
desc->version_major = VersionMajor;
desc->version_minor = VersionMinor;
desc->version_patch = VersionPatch;
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
database_initialize(db);
database_open(db);
database_write_index_descriptor(db, desc);
}
write_index_descriptor(path, &ScanCtx.index.desc);
database_close(db, FALSE);
}
void scan_print_header() {
LOG_INFOF("main.c", "sist2 v%s", Version)
}
void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
@@ -177,11 +198,8 @@ void _logf(const char *filepath, int level, char *format, ...) {
}
void initialize_scan_context(scan_args_t *args) {
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
// TODO: shared
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
@@ -189,7 +207,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse_job;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
} else {
@@ -199,12 +217,12 @@ void initialize_scan_context(scan_args_t *args) {
// Comic
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.store = write_thumbnail_callback;
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
ScanCtx.comic_ctx.tn_size = args->tn_size;
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz");
// Ebook
ScanCtx.ebook_ctx.content_size = args->content_size;
@@ -216,7 +234,7 @@ void initialize_scan_context(scan_args_t *args) {
}
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
@@ -224,7 +242,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
ScanCtx.font_ctx.store = write_thumbnail_callback;
// Media
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
@@ -232,7 +250,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.tn_count = args->tn_count;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.store = write_thumbnail_callback;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
ScanCtx.media_ctx.read_subtitles = args->tn_count;
@@ -248,7 +266,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.store = _store;
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
@@ -264,8 +282,8 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
ScanCtx.msdoc_ctx.store = _store;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
@@ -283,174 +301,67 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.raw_ctx.tn_size = args->tn_size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
ScanCtx.raw_ctx.store = write_thumbnail_callback;
// Wpd
ScanCtx.wpd_ctx.content_size = args->content_size;
ScanCtx.wpd_ctx.log = _log;
ScanCtx.wpd_ctx.logf = _logf;
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
// Json
ScanCtx.json_ctx.content_size = args->content_size;
ScanCtx.json_ctx.log = _log;
ScanCtx.json_ctx.logf = _logf;
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
}
/**
* Loads an existing index as the baseline for incremental scanning.
* 1. load old index files (original+main) => original_table
* 2. allocate empty table => copy_table
* 3. allocate empty table => new_table
* the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
* and consumed in main.c:save_incremental_index
*
* Note: the existing index may or may not be of incremental index form.
*/
void load_incremental_index(const scan_args_t *args) {
char file_path[PATH_MAX];
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
ScanCtx.new_table = incremental_get_table();
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
/**
* Saves an incremental index.
* Before calling this function, the scanner should have finished writing the main index.
* 1. Build original_table - new_table => delete_table
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
*/
void save_incremental_index(scan_args_t *args) {
char dst_path[PATH_MAX];
char store_path[PATH_MAX];
char file_path[PATH_MAX];
char del_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
READ_INDICES(file_path, args->incremental,
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
perror("incremental_delete"), 1);
writer_cleanup();
READ_INDICES(file_path, args->incremental,
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
perror("incremental_copy"), 1);
writer_cleanup();
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
}
/**
* An index can be either incremental or non-incremental (initial index).
* For an initial index, there is only the "main" index.
* For an incremental index, there are, additionally:
* - An "original" index, referencing all files unchanged since the previous index.
* - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
* Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
* and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
* the old "delete" index can be safely ignored.
*/
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
ScanCtx.ext_table = mime_get_ext_table();
initialize_scan_context(args);
init_dir(ScanCtx.index.path, args);
database_scan_begin(args);
char store_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
LOG_INFOF("main.c", "sist2 v%s", Version);
snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
scan_print_header();
if (args->incremental != NULL) {
load_incremental_index(args);
}
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, FALSE);
tpool_start(ScanCtx.writer_pool);
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE);
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
tpool_start(ScanCtx.pool);
if (args->list_path) {
// Scan using file list
int list_ret = iterate_file_list(args->list_file);
if (list_ret != 0) {
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret);
}
} else {
// Scan directory recursively
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
if (walk_ret == -1) {
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno);
}
}
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
tpool_wait(ScanCtx.writer_pool);
tpool_destroy(ScanCtx.writer_pool);
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count);
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count);
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count);
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
database_t *db = database_create(args->output, INDEX_DATABASE);
database_open(db);
if (args->incremental != NULL) {
save_incremental_index(args);
if (args->incremental != FALSE) {
database_incremental_scan_end(db);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
store_destroy(ScanCtx.index.store);
store_destroy(ScanCtx.index.meta_store);
database_generate_stats(db, args->treemap_threshold);
database_close(db, TRUE);
}
void sist2_index(index_args_t *args) {
char file_path[PATH_MAX];
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
@@ -461,91 +372,69 @@ void sist2_index(index_args_t *args) {
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
database_t *db = database_create(args->index_path, INDEX_DATABASE);
database_open(db);
index_descriptor_t *desc = database_read_index_descriptor(db);
database_close(db, FALSE);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
LOG_DEBUGF("main.c", "Index version %s", desc->version);
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
if (strcmp(desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version)
if (desc->version_major != VersionMajor) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version);
}
DIR *dir = opendir(args->index_path);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
index_func f;
if (args->print) {
f = print_json;
} else {
f = index_json;
}
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, args->print == 0);
IndexCtx.pool = tpool_create(args->threads, args->print == FALSE);
tpool_start(IndexCtx.pool);
READ_INDICES(file_path, args->index_path, {
read_index(file_path, desc.id, desc.type, f);
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
}, {}, !args->incremental);
int cnt = 0;
// Only read the _delete index if we're sending data to ES
if (!args->print) {
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
if (0 == access(file_path, R_OK)) {
read_lines(file_path, (line_processor_t) {
.data = NULL,
.func = delete_document
});
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
db = database_create(args->index_path, INDEX_DATABASE);
database_open(db);
database_iterator_t *iterator = database_create_document_iterator(db);
database_document_iter_foreach(json, iterator) {
const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
if (args->print) {
print_json(json, doc_id);
} else {
index_json(json, doc_id);
cnt +=1;
}
}
closedir(dir);
free(iterator);
database_close(db, FALSE);
// Only read the _delete index if we're sending data to ES
if (!args->print) {
// TODO: (delete_list iterator)
}
tpool_wait(IndexCtx.pool);
tpool_destroy(IndexCtx.pool);
if (IndexCtx.needs_es_connection) {
finish_indexer(args->script, args->async_script, desc.id);
finish_indexer(args->script, args->async_script, desc->id);
}
store_destroy(IndexCtx.tag_store);
store_destroy(IndexCtx.meta_store);
g_hash_table_remove_all(IndexCtx.tags);
g_hash_table_destroy(IndexCtx.tags);
free(desc);
}
void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
database_t *db = database_create(args->index_path, INDEX_DATABASE);
database_open(db);
execute_update_script(args->script, args->async_script, desc.id);
index_descriptor_t *desc = database_read_index_descriptor(db);
LOG_DEBUGF("main.c", "Index version %s", desc->version);
execute_update_script(args->script, args->async_script, desc->id);
free(args->script);
database_close(db, FALSE);
}
void sist2_web(web_args_t *args) {
@@ -569,23 +458,17 @@ void sist2_web(web_args_t *args) {
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
return;
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
strcpy(WebCtx.indices[i].path, abs_path);
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
database_open(WebCtx.indices[i].db);
index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
WebCtx.indices[i].desc = *desc;
free(desc);
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name);
free(abs_path);
}
@@ -600,7 +483,7 @@ void sist2_web(web_args_t *args) {
* Negative number -> Raise error
* Specified a valid number -> Continue as normal
*/
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) {
int specified_value = *(int *) option->value;
if (specified_value == 0) {
@@ -613,6 +496,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars
}
}
#include <zlib.h>
int main(int argc, const char *argv[]) {
// sigsegv_handler = signal(SIGSEGV, sig_handler);
@@ -645,8 +529,8 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
@@ -656,7 +540,8 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
OPT_STRING(0, "incremental", &scan_args->incremental,
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
// TODO: Update help string
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
@@ -692,7 +577,8 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
@@ -701,20 +587,22 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"),
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from <domain>/pem"),
OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path,
"Path to Auth0 public key file extracted from <domain>/pem"),
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
@@ -722,7 +610,8 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -800,7 +689,7 @@ int main(int argc, const char *argv[]) {
} else {
argparse_usage(&argparse);
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
}
printf("\n");