From 300c70883d33ce338cb2f32bd3ce84ab7c67a6ae Mon Sep 17 00:00:00 2001 From: simon987 Date: Mon, 10 Apr 2023 11:04:16 -0400 Subject: [PATCH] Fixes and cleanup --- scripts/mime.csv | 3 + src/cli.c | 2 +- src/cli.h | 1 + src/ctx.h | 7 - src/database/database.c | 68 ++- src/database/database.h | 10 +- src/io/walk.c | 10 - src/main.c | 212 +++---- src/parsing/fs_util.h | 1 - src/parsing/magic_util.c | 2 +- src/parsing/mime_generated.c | 567 ++++++++++--------- src/parsing/parse.c | 34 +- src/sist.h | 2 + src/tpool.c | 153 ++--- src/tpool.h | 2 - third-party/libscan/libscan/arc/arc.c | 8 +- third-party/libscan/libscan/comic/comic.c | 4 +- third-party/libscan/libscan/ebook/ebook.c | 38 +- third-party/libscan/libscan/font/font.c | 12 +- third-party/libscan/libscan/json/json.c | 2 +- third-party/libscan/libscan/macros.h | 16 +- third-party/libscan/libscan/media/media.c | 92 +-- third-party/libscan/libscan/mobi/scan_mobi.c | 18 +- third-party/libscan/libscan/msdoc/msdoc.c | 10 +- third-party/libscan/libscan/ooxml/ooxml.c | 30 +- third-party/libscan/libscan/raw/raw.c | 42 +- third-party/libscan/libscan/scan.h | 20 +- third-party/libscan/libscan/text/text.c | 12 +- third-party/libscan/libscan/wpd/wpd.c | 8 +- 29 files changed, 678 insertions(+), 708 deletions(-) diff --git a/scripts/mime.csv b/scripts/mime.csv index 4b32f32..e1b25df 100644 --- a/scripts/mime.csv +++ b/scripts/mime.csv @@ -1,3 +1,4 @@ +application/x-matlab-data,mat application/arj, arj application/base64, mme application/binhex, hqx @@ -346,6 +347,8 @@ text/mcf, mcf text/pascal, pas text/PGP, text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml +text/x-script.python, pyx +text/csv, application/vnd.coffeescript, coffee text/richtext, rt|rtf|rtx text/rtf, diff --git a/src/cli.c b/src/cli.c index e2540ba..9546a27 100644 --- a/src/cli.c +++ b/src/cli.c @@ -142,7 +142,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { char *abs_output = abspath(args->output); if (args->incremental && abs_output == NULL) { - LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", abs_output); + LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output); args->incremental = FALSE; } else if (!args->incremental && abs_output != NULL) { LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output); diff --git a/src/cli.h b/src/cli.h index e1e039c..10d48c3 100644 --- a/src/cli.h +++ b/src/cli.h @@ -14,6 +14,7 @@ typedef struct scan_args { int content_size; int threads; int incremental; + int optimize_database; char *output; char *rewrite_url; char *name; diff --git a/src/ctx.h b/src/ctx.h index f56afd8..9e54cb0 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -38,13 +38,6 @@ typedef struct { pcre_extra *exclude_extra; int fast; - pthread_mutex_t dbg_current_files_mu; - - int dbg_failed_files_count; - int dbg_skipped_files_count; - int dbg_excluded_files_count; - pthread_mutex_t dbg_file_counts_mu; - scan_arc_ctx_t arc_ctx; scan_comic_ctx_t comic_ctx; scan_ebook_ctx_t ebook_ctx; diff --git a/src/database/database.c b/src/database/database.c index 741187c..bf867c7 100644 --- a/src/database/database.c +++ b/src/database/database.c @@ -8,7 +8,6 @@ #include - database_t *database_create(const char *filename, database_type_t type) { database_t *db = malloc(sizeof(database_t)); @@ -81,7 +80,7 @@ void database_initialize(database_t *db) { } void database_open(database_t *db) { - LOG_DEBUGF("tpool.c", "Opening database %s (%d)", db->filename, db->type); + LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type); CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db)); @@ -113,7 +112,8 @@ void database_open(database_t *db) { &db->write_document_stmt, NULL)); CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( db->db, - "INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", -1, + "INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", + -1, &db->write_thumbnail_stmt, NULL)); // Create functions @@ -186,12 +186,16 @@ void database_close(database_t *db, int optimize) { if (optimize) { LOG_DEBUG("database.c", "Optimizing database"); - // TODO: This should be an optional argument -// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL)); CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL)); } sqlite3_close(db->db); + + if (db->type == IPC_PRODUCER_DATABASE) { + remove(db->filename); + } + free(db); db = NULL; } @@ -202,11 +206,14 @@ void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *r int ret = sqlite3_step(db->select_thumbnail_stmt); - // TODO: if row not found, return null - if (ret != SQLITE_ROW) { - LOG_FATALF("database.c", "FIXME: tn step returned %d", ret); + if (ret == SQLITE_DONE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt)); + *return_value_len = 0; + return NULL; } + CRASH_IF_STMT_FAIL(ret); + const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0); const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0); @@ -275,11 +282,47 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) { return desc; } +database_iterator_t *database_create_delete_list_iterator(database_t *db) { + + sqlite3_stmt *stmt; + sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL); + + database_iterator_t *iter = malloc(sizeof(database_iterator_t)); + + iter->stmt = stmt; + iter->db = db; + + return iter; +} + +char *database_delete_list_iter(database_iterator_t *iter) { + int ret = sqlite3_step(iter->stmt); + + if (ret == SQLITE_ROW) { + const char *id = (const char *) sqlite3_column_text(iter->stmt, 0); + char *id_heap = malloc(strlen(id) + 1); + strcpy(id_heap, id); + return id_heap; + } + + if (ret != SQLITE_DONE) { + LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db)); + } + + if (sqlite3_finalize(iter->stmt) != SQLITE_OK) { + LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db)); + } + + iter->stmt = NULL; + + return NULL; +} + database_iterator_t *database_create_document_iterator(database_t *db) { sqlite3_stmt *stmt; - // TODO: remove mtime, size, _id from json_data + // TODO optimization: remove mtime, size, _id from json_data sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE" " WHEN sc.json_data IS NULL THEN" @@ -494,10 +537,10 @@ job_t *database_get_work(database_t *db, job_type_t job_type) { CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt)); pthread_mutex_unlock(&db->ipc_ctx->db_mutex); return NULL; - } else { - CRASH_IF_STMT_FAIL(ret); } + CRASH_IF_STMT_FAIL(ret); + job = malloc(sizeof(*job)); const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2); @@ -511,9 +554,6 @@ job_t *database_get_work(database_t *db, job_type_t job_type) { job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1); job->bulk_line->next = NULL; - // TODO CRASH IF NOT OK - sqlite3_step(db->pop_parse_job_stmt); - CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt)); } diff --git a/src/database/database.h b/src/database/database.h index d36f802..3c61661 100644 --- a/src/database/database.h +++ b/src/database/database.h @@ -41,7 +41,7 @@ typedef struct { pthread_mutex_t db_mutex; pthread_mutex_t index_db_mutex; pthread_cond_t has_work_cond; - char current_job[256][PATH_MAX * 2]; + char current_job[MAX_THREADS][PATH_MAX * 2]; } database_ipc_ctx_t; typedef struct database { @@ -106,6 +106,14 @@ cJSON *database_document_iter(database_iterator_t *); #define database_document_iter_foreach(element, iter) \ for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter)) +database_iterator_t *database_create_delete_list_iterator(database_t *db); + +char * database_delete_list_iter(database_iterator_t *iter); + +#define database_delete_list_iter_foreach(element, iter) \ + for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter)) + + cJSON *database_incremental_scan_begin(database_t *db); cJSON *database_incremental_scan_end(database_t *db); diff --git a/src/io/walk.c b/src/io/walk.c index c9fa8b0..dfeb2b3 100644 --- a/src/io/walk.c +++ b/src/io/walk.c @@ -24,9 +24,6 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st LOG_DEBUGF("walk.c", "Excluded: %s", filepath); if (typeflag == FTW_F && S_ISREG(info->st_mode)) { - pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); - ScanCtx.dbg_excluded_files_count += 1; - pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); } else if (typeflag == FTW_D) { return FTW_SKIP_SUBTREE; } @@ -83,13 +80,6 @@ int iterate_file_list(void *input_file) { if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) { LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path); - - if (S_ISREG(info.st_mode)) { - pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); - ScanCtx.dbg_excluded_files_count += 1; - pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); - } - continue; } diff --git a/src/main.c b/src/main.c index 72bdc89..7b02d10 100644 --- a/src/main.c +++ b/src/main.c @@ -18,10 +18,6 @@ #include "src/database/database.h" -#define DESCRIPTION "Lightning-fast file system indexer and search tool." - -#define EPILOG "Made by simon987 . Released under GPL-3.0" - static const char *const usage[] = { "sist2 scan [OPTION]... PATH", @@ -32,77 +28,6 @@ static const char *const usage[] = { }; -static __sighandler_t sigsegv_handler = NULL; -static __sighandler_t sigabrt_handler = NULL; - -void sig_handler(int signum) { - - LogCtx.verbose = TRUE; - LogCtx.very_verbose = TRUE; - - LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n"); - LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum)); - - // TODO: Print debug info -// if (ScanCtx.dbg_current_files != NULL) { -// GHashTableIter iter; -// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files); -// -// void *key; -// void *value; -// while (g_hash_table_iter_next(&iter, &key, &value)) { -// parse_job_t *job = value; -// -// if (isatty(STDERR_FILENO)) { -// LOG_DEBUGF( -// "*SIGNAL HANDLER*", -// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'", -// 31 + ((unsigned int) key) % 7, key, job->filepath -// ); -// } else { -// LOG_DEBUGF( -// "*SIGNAL HANDLER*", -// "THREAD [%04llX] was working on job %s", -// key, job->filepath -// ); -// } -// } -// } - - if (ScanCtx.pool != NULL) { - tpool_dump_debug_info(ScanCtx.pool); - } - - if (IndexCtx.pool != NULL) { - tpool_dump_debug_info(IndexCtx.pool); - } - - LOG_INFO( - "*SIGNAL HANDLER*", - "Please consider creating a bug report at https://github.com/simon987/sist2/issues !" - ); - LOG_INFO( - "*SIGNAL HANDLER*", - "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs" - ); - -#ifndef SIST_DEBUG - LOG_WARNING( - "*SIGNAL HANDLER*", - "You are running sist2 in release mode! Please consider downloading the debug binary from the Github " - "releases page to provide additionnal information when submitting a bug report." - ); -#endif - - if (signum == SIGSEGV && sigsegv_handler != NULL) { - sigsegv_handler(signum); - } else if (signum == SIGABRT && sigabrt_handler != NULL) { - sigabrt_handler(signum); - } - - exit(-1); -} - void database_scan_begin(scan_args_t *args) { index_descriptor_t *desc = &ScanCtx.index.desc; @@ -158,7 +83,7 @@ void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) { database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len); } -void _log(const char *filepath, int level, char *str) { +void log_callback(const char *filepath, int level, char *str) { if (level == LEVEL_FATAL) { sist_log(filepath, level, str); exit(-1); @@ -175,7 +100,7 @@ void _log(const char *filepath, int level, char *str) { } } -void _logf(const char *filepath, int level, char *format, ...) { +void logf_callback(const char *filepath, int level, char *format, ...) { va_list args; @@ -198,15 +123,13 @@ void _logf(const char *filepath, int level, char *format, ...) { } void initialize_scan_context(scan_args_t *args) { - // TODO: shared - pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); ScanCtx.calculate_checksums = args->calculate_checksums; // Archive ScanCtx.arc_ctx.mode = args->archive_mode; - ScanCtx.arc_ctx.log = _log; - ScanCtx.arc_ctx.logf = _logf; + ScanCtx.arc_ctx.log = log_callback; + ScanCtx.arc_ctx.logf = logf_callback; ScanCtx.arc_ctx.parse = (parse_callback_t) parse; if (args->archive_passphrase != NULL) { strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase); @@ -215,8 +138,8 @@ void initialize_scan_context(scan_args_t *args) { } // Comic - ScanCtx.comic_ctx.log = _log; - ScanCtx.comic_ctx.logf = _logf; + ScanCtx.comic_ctx.log = log_callback; + ScanCtx.comic_ctx.logf = logf_callback; ScanCtx.comic_ctx.store = write_thumbnail_callback; ScanCtx.comic_ctx.enable_tn = args->tn_count > 0; ScanCtx.comic_ctx.tn_size = args->tn_size; @@ -232,24 +155,24 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang; ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path; } - ScanCtx.ebook_ctx.log = _log; - ScanCtx.ebook_ctx.logf = _logf; + ScanCtx.ebook_ctx.log = log_callback; + ScanCtx.ebook_ctx.logf = logf_callback; ScanCtx.ebook_ctx.store = write_thumbnail_callback; ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub; ScanCtx.ebook_ctx.tn_qscale = args->tn_quality; // Font ScanCtx.font_ctx.enable_tn = args->tn_count > 0; - ScanCtx.font_ctx.log = _log; - ScanCtx.font_ctx.logf = _logf; + ScanCtx.font_ctx.log = log_callback; + ScanCtx.font_ctx.logf = logf_callback; ScanCtx.font_ctx.store = write_thumbnail_callback; // Media ScanCtx.media_ctx.tn_qscale = args->tn_quality; ScanCtx.media_ctx.tn_size = args->tn_size; ScanCtx.media_ctx.tn_count = args->tn_count; - ScanCtx.media_ctx.log = _log; - ScanCtx.media_ctx.logf = _logf; + ScanCtx.media_ctx.log = log_callback; + ScanCtx.media_ctx.logf = logf_callback; ScanCtx.media_ctx.store = write_thumbnail_callback; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024; ScanCtx.media_ctx.read_subtitles = args->read_subtitles; @@ -264,24 +187,24 @@ void initialize_scan_context(scan_args_t *args) { // OOXML ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0; ScanCtx.ooxml_ctx.content_size = args->content_size; - ScanCtx.ooxml_ctx.log = _log; - ScanCtx.ooxml_ctx.logf = _logf; + ScanCtx.ooxml_ctx.log = log_callback; + ScanCtx.ooxml_ctx.logf = logf_callback; ScanCtx.ooxml_ctx.store = write_thumbnail_callback; // MOBI ScanCtx.mobi_ctx.content_size = args->content_size; - ScanCtx.mobi_ctx.log = _log; - ScanCtx.mobi_ctx.logf = _logf; + ScanCtx.mobi_ctx.log = log_callback; + ScanCtx.mobi_ctx.logf = logf_callback; // TEXT ScanCtx.text_ctx.content_size = args->content_size; - ScanCtx.text_ctx.log = _log; - ScanCtx.text_ctx.logf = _logf; + ScanCtx.text_ctx.log = log_callback; + ScanCtx.text_ctx.logf = logf_callback; // MSDOC ScanCtx.msdoc_ctx.content_size = args->content_size; - ScanCtx.msdoc_ctx.log = _log; - ScanCtx.msdoc_ctx.logf = _logf; + ScanCtx.msdoc_ctx.log = log_callback; + ScanCtx.msdoc_ctx.logf = logf_callback; ScanCtx.msdoc_ctx.store = write_thumbnail_callback; ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword"); @@ -299,20 +222,20 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.raw_ctx.tn_qscale = args->tn_quality; ScanCtx.raw_ctx.enable_tn = args->tn_count > 0; ScanCtx.raw_ctx.tn_size = args->tn_size; - ScanCtx.raw_ctx.log = _log; - ScanCtx.raw_ctx.logf = _logf; + ScanCtx.raw_ctx.log = log_callback; + ScanCtx.raw_ctx.logf = logf_callback; ScanCtx.raw_ctx.store = write_thumbnail_callback; // Wpd ScanCtx.wpd_ctx.content_size = args->content_size; - ScanCtx.wpd_ctx.log = _log; - ScanCtx.wpd_ctx.logf = _logf; + ScanCtx.wpd_ctx.log = log_callback; + ScanCtx.wpd_ctx.logf = logf_callback; ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect"); // Json ScanCtx.json_ctx.content_size = args->content_size; - ScanCtx.json_ctx.log = _log; - ScanCtx.json_ctx.logf = _logf; + ScanCtx.json_ctx.log = log_callback; + ScanCtx.json_ctx.logf = logf_callback; ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json"); ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson"); } @@ -344,9 +267,6 @@ void sist2_scan(scan_args_t *args) { tpool_wait(ScanCtx.pool); tpool_destroy(ScanCtx.pool); - LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count); - LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count); - LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count); LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size); LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size); @@ -358,7 +278,7 @@ void sist2_scan(scan_args_t *args) { } database_generate_stats(db, args->treemap_threshold); - database_close(db, TRUE); + database_close(db, args->optimize_database); } void sist2_index(index_args_t *args) { @@ -397,16 +317,19 @@ void sist2_index(index_args_t *args) { print_json(json, doc_id); } else { index_json(json, doc_id); - cnt +=1; + cnt += 1; } } free(iterator); database_close(db, FALSE); - // Only read the _delete index if we're sending data to ES if (!args->print) { - // TODO: (delete_list iterator) + database_iterator_t *del_iter = database_create_delete_list_iterator(db); + database_delete_list_iter_foreach(id, del_iter) { + delete_document(id); + free(id); + } } tpool_wait(IndexCtx.pool); @@ -496,12 +419,7 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct } } -#include - int main(int argc, const char *argv[]) { -// sigsegv_handler = signal(SIGSEGV, sig_handler); -// sigabrt_handler = signal(SIGABRT, sig_handler); - setlocale(LC_ALL, ""); scan_args_t *scan_args = scan_args_create(); @@ -521,36 +439,37 @@ int main(int argc, const char *argv[]) { struct argparse_option options[] = { OPT_HELP(), - OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"), - OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"), - OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"), + OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."), + OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."), + OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."), OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."), OPT_GROUP("Scan options"), - OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), + OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"), OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality, - "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2", + "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality), OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size, - "Thumbnail size, in pixels. DEFAULT=500", + "Thumbnail size, in pixels. DEFAULT: 552", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size), OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count, - "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1", + "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count), OPT_INTEGER(0, "content-size", &scan_args->content_size, - "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768", + "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size), + OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"), OPT_BOOLEAN(0, "incremental", &scan_args->incremental, - // TODO: Update help string - "Reuse an existing index and only scan modified files."), - OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"), + "If the output file path exists, only scan new or modified files."), + OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database, + "Defragment index file after scan to reduce its file size."), OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."), - OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"), + OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"), OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. " "Use 0 to only scan files in PATH. DEFAULT: -1"), OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). " - "skip: Don't parse, list: only get file names as text, " - "shallow: Don't parse archives inside archives. DEFAULT: recurse"), + "skip: don't scan, list: only save file names as text, " + "shallow: don't scan archives inside archives. DEFAULT: recurse"), OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase, "Passphrase for encrypted archive files"), @@ -559,8 +478,8 @@ int main(int argc, const char *argv[]) { "which are installed on your machine)"), OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."), OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."), - OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"), - OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"), + OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."), + OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."), OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap " "(see USAGE.md). DEFAULT: 0.0005"), OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib, @@ -568,19 +487,20 @@ int main(int argc, const char *argv[]) { "(see USAGE.md). DEFAULT: 2000"), OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, - "Faster but less accurate EPUB parsing (no thumbnails, metadata)"), + "Faster but less accurate EPUB parsing (no thumbnails, metadata)."), OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."), OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned" " instead of normal directory traversal. Use '-' to read" " from stdin."), OPT_GROUP("Index options"), - OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), - OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), + OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"), + OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"), OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), - OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), - OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), + OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"), + OPT_BOOLEAN('p', "print", &index_args->print, + "Print JSON documents to stdout instead of indexing to elasticsearch."), OPT_BOOLEAN(0, "incremental-index", &index_args->incremental, "Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), @@ -588,15 +508,15 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."), OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."), OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"), - OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " - "(You must use this option the first time you use the index command)"), + OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."), OPT_GROUP("Web options"), - OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), + OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"), OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), - OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), - OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), + OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"), + OPT_STRING(0, "bind", &web_args->listen_address, + "Listen for connections on this address. DEFAULT: localhost:4090"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"), OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"), @@ -609,10 +529,10 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"), OPT_GROUP("Exec-script options"), - OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), + OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"), OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), - OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), + OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."), @@ -621,7 +541,11 @@ int main(int argc, const char *argv[]) { struct argparse argparse; argparse_init(&argparse, options, usage, 0); - argparse_describe(&argparse, DESCRIPTION, EPILOG); + argparse_describe( + &argparse, + "\nLightning-fast file system indexer and search tool.", + "\nMade by simon987 . Released under GPL-3.0" + ); argc = argparse_parse(&argparse, argc, argv); if (arg_version) { diff --git a/src/parsing/fs_util.h b/src/parsing/fs_util.h index a3b257e..3b7c1a7 100644 --- a/src/parsing/fs_util.h +++ b/src/parsing/fs_util.h @@ -6,7 +6,6 @@ #define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));}; static int fs_read(struct vfile *f, void *buf, size_t size) { - if (f->fd == -1) { SHA1_Init(&f->sha1_ctx); diff --git a/src/parsing/magic_util.c b/src/parsing/magic_util.c index e5443a8..d8539a1 100644 --- a/src/parsing/magic_util.c +++ b/src/parsing/magic_util.c @@ -12,7 +12,7 @@ char *magic_buffer_embedded(void *buffer, size_t buffer_size) { const char *magic_buffers[1] = {magic_database_buffer,}; size_t sizes[1] = {sizeof(magic_database_buffer),}; - // TODO: check if we can reuse the magic instance + // TODO optimisation: check if we can reuse the magic instance int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1); if (load_ret != 0) { diff --git a/src/parsing/mime_generated.c b/src/parsing/mime_generated.c index 6eedeae..c68c52c 100644 --- a/src/parsing/mime_generated.c +++ b/src/parsing/mime_generated.c @@ -174,287 +174,291 @@ application_x_mach_binary=655526, application_x_mach_executable=655527, application_x_magic_cap_package_1_0=655528, application_x_mathcad=655529, -application_x_maxis_dbpf=655530, -application_x_meme=655531, -application_x_midi=655532, -application_x_mif=655533, -application_x_mix_transfer=655534, -application_x_mobipocket_ebook=655535 | 0x02000000, -application_x_ms_compress_szdd=655536, -application_x_ms_pdb=655537, -application_x_ms_reader=655538, -application_x_msaccess=655539, -application_x_n64_rom=655540, -application_x_navi_animation=655541, -application_x_navidoc=655542, -application_x_navimap=655543, -application_x_navistyle=655544, -application_x_nes_rom=655545, -application_x_netcdf=655546, -application_x_newton_compatible_pkg=655547, -application_x_nintendo_ds_rom=655548, -application_x_object=655549, -application_x_omc=655550, -application_x_omcdatamaker=655551, -application_x_omcregerator=655552, -application_x_pagemaker=655553, -application_x_pcl=655554, -application_x_pgp_keyring=655555, -application_x_pixclscript=655556, -application_x_pkcs7_certreqresp=655557, -application_x_pkcs7_signature=655558, -application_x_project=655559, -application_x_qpro=655560, -application_x_rar=655561 | 0x10000000, -application_x_rpm=655562, -application_x_sdp=655563, -application_x_sea=655564, -application_x_seelogo=655565, -application_x_setupscript=655566, -application_x_shar=655567, -application_x_sharedlib=655568, -application_x_shockwave_flash=655569, -application_x_snappy_framed=655570, -application_x_sprite=655571, -application_x_sqlite3=655572, -application_x_stargallery_thm=655573, -application_x_stuffit=655574, -application_x_sv4cpio=655575, -application_x_sv4crc=655576, -application_x_tar=655577 | 0x10000000, -application_x_tbook=655578, -application_x_terminfo=655579, -application_x_terminfo2=655580, -application_x_tex_tfm=655581, -application_x_texinfo=655582, -application_x_ustar=655583, -application_x_visio=655584, -application_x_vnd_audioexplosion_mzz=655585, -application_x_vnd_ls_xpix=655586, -application_x_vrml=655587, -application_x_wais_source=655588, -application_x_wine_extension_ini=655589, -application_x_wintalk=655590, -application_x_world=655591, -application_x_wri=655592, -application_x_x509_ca_cert=655593, -application_x_xz=655594 | 0x08000000, -application_x_zip=655595, -application_x_zstd=655596 | 0x08000000, -application_x_zstd_dictionary=655597, -application_xml=655598, -application_zip=655599 | 0x10000000, -application_zlib=655600, -audio_basic=458993 | 0x80000000, -audio_it=458994, -audio_make=458995, -audio_mid=458996, -audio_midi=458997, -audio_mp4=458998, -audio_mpeg=458999, -audio_ogg=459000, -audio_s3m=459001, -audio_tsp_audio=459002, -audio_tsplayer=459003, -audio_vnd_qcelp=459004, -audio_voxware=459005, -audio_x_aiff=459006, -audio_x_flac=459007, -audio_x_gsm=459008, -audio_x_hx_aac_adts=459009, -audio_x_jam=459010, -audio_x_liveaudio=459011, -audio_x_m4a=459012, -audio_x_midi=459013, -audio_x_mod=459014, -audio_x_mp4a_latm=459015, -audio_x_mpeg_3=459016, -audio_x_mpequrl=459017, -audio_x_nspaudio=459018, -audio_x_pn_realaudio=459019, -audio_x_psid=459020, -audio_x_realaudio=459021, -audio_x_s3m=459022, -audio_x_twinvq=459023, -audio_x_twinvq_plugin=459024, -audio_x_voc=459025, -audio_x_wav=459026, -audio_x_xbox_executable=459027 | 0x80000000, -audio_x_xbox360_executable=459028 | 0x80000000, -audio_xm=459029, -font_otf=327958 | 0x20000000, -font_sfnt=327959 | 0x20000000, -font_woff=327960 | 0x20000000, -font_woff2=327961 | 0x20000000, -image_bmp=524570, -image_cmu_raster=524571, -image_fif=524572, -image_florian=524573, -image_g3fax=524574, -image_gif=524575, -image_heic=524576, -image_ief=524577, -image_jpeg=524578, -image_jutvision=524579, -image_naplps=524580, -image_pict=524581, -image_png=524582, -image_svg=524583 | 0x80000000, -image_svg_xml=524584 | 0x80000000, -image_tiff=524585, -image_vnd_adobe_photoshop=524586 | 0x80000000, -image_vnd_djvu=524587 | 0x80000000, -image_vnd_fpx=524588, -image_vnd_microsoft_icon=524589, -image_vnd_rn_realflash=524590, -image_vnd_rn_realpix=524591, -image_vnd_wap_wbmp=524592, -image_vnd_xiff=524593, -image_webp=524594, -image_wmf=524595, -image_x_3ds=524596, -image_x_adobe_dng=524597 | 0x00800000, -image_x_award_bioslogo=524598, -image_x_canon_cr2=524599 | 0x00800000, -image_x_canon_crw=524600 | 0x00800000, -image_x_cmu_raster=524601, -image_x_cur=524602, -image_x_dcraw=524603 | 0x00800000, -image_x_dwg=524604, -image_x_eps=524605, -image_x_epson_erf=524606 | 0x00800000, -image_x_exr=524607, -image_x_fuji_raf=524608 | 0x00800000, -image_x_gem=524609, -image_x_icns=524610, -image_x_icon=524611 | 0x80000000, -image_x_jg=524612, -image_x_jps=524613, -image_x_kodak_dcr=524614 | 0x00800000, -image_x_kodak_k25=524615 | 0x00800000, -image_x_kodak_kdc=524616 | 0x00800000, -image_x_minolta_mrw=524617 | 0x00800000, -image_x_ms_bmp=524618, -image_x_niff=524619, -image_x_nikon_nef=524620 | 0x00800000, -image_x_olympus_orf=524621 | 0x00800000, -image_x_panasonic_raw=524622 | 0x00800000, -image_x_pcx=524623, -image_x_pentax_pef=524624 | 0x00800000, -image_x_pict=524625, -image_x_portable_bitmap=524626, -image_x_portable_graymap=524627, -image_x_portable_pixmap=524628, -image_x_quicktime=524629, -image_x_rgb=524630, -image_x_sigma_x3f=524631 | 0x00800000, -image_x_sony_arw=524632 | 0x00800000, -image_x_sony_sr2=524633 | 0x00800000, -image_x_sony_srf=524634 | 0x00800000, -image_x_tga=524635, -image_x_tiff=524636, -image_x_win_bitmap=524637, -image_x_xcf=524638 | 0x80000000, -image_x_xpixmap=524639 | 0x80000000, -image_x_xwindowdump=524640, -message_news=196961, -message_rfc822=196962, -model_vnd_dwf=65891, -model_vnd_gdl=65892, -model_vnd_gs_gdl=65893, -model_vrml=65894, -model_x_pov=65895, +application_x_matlab_data=655530, +application_x_maxis_dbpf=655531, +application_x_meme=655532, +application_x_midi=655533, +application_x_mif=655534, +application_x_mix_transfer=655535, +application_x_mobipocket_ebook=655536 | 0x02000000, +application_x_ms_compress_szdd=655537, +application_x_ms_pdb=655538, +application_x_ms_reader=655539, +application_x_msaccess=655540, +application_x_n64_rom=655541, +application_x_navi_animation=655542, +application_x_navidoc=655543, +application_x_navimap=655544, +application_x_navistyle=655545, +application_x_nes_rom=655546, +application_x_netcdf=655547, +application_x_newton_compatible_pkg=655548, +application_x_nintendo_ds_rom=655549, +application_x_object=655550, +application_x_omc=655551, +application_x_omcdatamaker=655552, +application_x_omcregerator=655553, +application_x_pagemaker=655554, +application_x_pcl=655555, +application_x_pgp_keyring=655556, +application_x_pixclscript=655557, +application_x_pkcs7_certreqresp=655558, +application_x_pkcs7_signature=655559, +application_x_project=655560, +application_x_qpro=655561, +application_x_rar=655562 | 0x10000000, +application_x_rpm=655563, +application_x_sdp=655564, +application_x_sea=655565, +application_x_seelogo=655566, +application_x_setupscript=655567, +application_x_shar=655568, +application_x_sharedlib=655569, +application_x_shockwave_flash=655570, +application_x_snappy_framed=655571, +application_x_sprite=655572, +application_x_sqlite3=655573, +application_x_stargallery_thm=655574, +application_x_stuffit=655575, +application_x_sv4cpio=655576, +application_x_sv4crc=655577, +application_x_tar=655578 | 0x10000000, +application_x_tbook=655579, +application_x_terminfo=655580, +application_x_terminfo2=655581, +application_x_tex_tfm=655582, +application_x_texinfo=655583, +application_x_ustar=655584, +application_x_visio=655585, +application_x_vnd_audioexplosion_mzz=655586, +application_x_vnd_ls_xpix=655587, +application_x_vrml=655588, +application_x_wais_source=655589, +application_x_wine_extension_ini=655590, +application_x_wintalk=655591, +application_x_world=655592, +application_x_wri=655593, +application_x_x509_ca_cert=655594, +application_x_xz=655595 | 0x08000000, +application_x_zip=655596, +application_x_zstd=655597 | 0x08000000, +application_x_zstd_dictionary=655598, +application_xml=655599, +application_zip=655600 | 0x10000000, +application_zlib=655601, +audio_basic=458994 | 0x80000000, +audio_it=458995, +audio_make=458996, +audio_mid=458997, +audio_midi=458998, +audio_mp4=458999, +audio_mpeg=459000, +audio_ogg=459001, +audio_s3m=459002, +audio_tsp_audio=459003, +audio_tsplayer=459004, +audio_vnd_qcelp=459005, +audio_voxware=459006, +audio_x_aiff=459007, +audio_x_flac=459008, +audio_x_gsm=459009, +audio_x_hx_aac_adts=459010, +audio_x_jam=459011, +audio_x_liveaudio=459012, +audio_x_m4a=459013, +audio_x_midi=459014, +audio_x_mod=459015, +audio_x_mp4a_latm=459016, +audio_x_mpeg_3=459017, +audio_x_mpequrl=459018, +audio_x_nspaudio=459019, +audio_x_pn_realaudio=459020, +audio_x_psid=459021, +audio_x_realaudio=459022, +audio_x_s3m=459023, +audio_x_twinvq=459024, +audio_x_twinvq_plugin=459025, +audio_x_voc=459026, +audio_x_wav=459027, +audio_x_xbox_executable=459028 | 0x80000000, +audio_x_xbox360_executable=459029 | 0x80000000, +audio_xm=459030, +font_otf=327959 | 0x20000000, +font_sfnt=327960 | 0x20000000, +font_woff=327961 | 0x20000000, +font_woff2=327962 | 0x20000000, +image_bmp=524571, +image_cmu_raster=524572, +image_fif=524573, +image_florian=524574, +image_g3fax=524575, +image_gif=524576, +image_heic=524577, +image_ief=524578, +image_jpeg=524579, +image_jutvision=524580, +image_naplps=524581, +image_pict=524582, +image_png=524583, +image_svg=524584 | 0x80000000, +image_svg_xml=524585 | 0x80000000, +image_tiff=524586, +image_vnd_adobe_photoshop=524587 | 0x80000000, +image_vnd_djvu=524588 | 0x80000000, +image_vnd_fpx=524589, +image_vnd_microsoft_icon=524590, +image_vnd_rn_realflash=524591, +image_vnd_rn_realpix=524592, +image_vnd_wap_wbmp=524593, +image_vnd_xiff=524594, +image_webp=524595, +image_wmf=524596, +image_x_3ds=524597, +image_x_adobe_dng=524598 | 0x00800000, +image_x_award_bioslogo=524599, +image_x_canon_cr2=524600 | 0x00800000, +image_x_canon_crw=524601 | 0x00800000, +image_x_cmu_raster=524602, +image_x_cur=524603, +image_x_dcraw=524604 | 0x00800000, +image_x_dwg=524605, +image_x_eps=524606, +image_x_epson_erf=524607 | 0x00800000, +image_x_exr=524608, +image_x_fuji_raf=524609 | 0x00800000, +image_x_gem=524610, +image_x_icns=524611, +image_x_icon=524612 | 0x80000000, +image_x_jg=524613, +image_x_jps=524614, +image_x_kodak_dcr=524615 | 0x00800000, +image_x_kodak_k25=524616 | 0x00800000, +image_x_kodak_kdc=524617 | 0x00800000, +image_x_minolta_mrw=524618 | 0x00800000, +image_x_ms_bmp=524619, +image_x_niff=524620, +image_x_nikon_nef=524621 | 0x00800000, +image_x_olympus_orf=524622 | 0x00800000, +image_x_panasonic_raw=524623 | 0x00800000, +image_x_pcx=524624, +image_x_pentax_pef=524625 | 0x00800000, +image_x_pict=524626, +image_x_portable_bitmap=524627, +image_x_portable_graymap=524628, +image_x_portable_pixmap=524629, +image_x_quicktime=524630, +image_x_rgb=524631, +image_x_sigma_x3f=524632 | 0x00800000, +image_x_sony_arw=524633 | 0x00800000, +image_x_sony_sr2=524634 | 0x00800000, +image_x_sony_srf=524635 | 0x00800000, +image_x_tga=524636, +image_x_tiff=524637, +image_x_win_bitmap=524638, +image_x_xcf=524639 | 0x80000000, +image_x_xpixmap=524640 | 0x80000000, +image_x_xwindowdump=524641, +message_news=196962, +message_rfc822=196963, +model_vnd_dwf=65892, +model_vnd_gdl=65893, +model_vnd_gs_gdl=65894, +model_vrml=65895, +model_x_pov=65896, sist2_sidecar=2, -text_PGP=590184, -text_asp=590185, -text_css=590186, -text_html=590187 | 0x01000000, -text_javascript=590188, -text_mcf=590189, -text_pascal=590190, -text_plain=590191, -text_richtext=590192, -text_rtf=590193, -text_scriplet=590194, -text_tab_separated_values=590195, -text_troff=590196, -text_uri_list=590197, -text_vnd_abc=590198, -text_vnd_fmi_flexstor=590199, -text_vnd_wap_wml=590200, -text_vnd_wap_wmlscript=590201, -text_webviewhtml=590202, -text_x_Algol68=590203, -text_x_asm=590204, -text_x_audiosoft_intra=590205, -text_x_awk=590206, -text_x_bcpl=590207, -text_x_c=590208, -text_x_c__=590209, -text_x_component=590210, -text_x_diff=590211, -text_x_fortran=590212, -text_x_java=590213, -text_x_la_asf=590214, -text_x_lisp=590215, -text_x_m=590216, -text_x_m4=590217, -text_x_makefile=590218, -text_x_ms_regedit=590219, -text_x_msdos_batch=590220, -text_x_objective_c=590221, -text_x_pascal=590222, -text_x_perl=590223, -text_x_php=590224, -text_x_po=590225, -text_x_python=590226, -text_x_ruby=590227, -text_x_sass=590228, -text_x_scss=590229, -text_x_server_parsed_html=590230, -text_x_setext=590231, -text_x_sgml=590232 | 0x01000000, -text_x_shellscript=590233, -text_x_speech=590234, -text_x_tcl=590235, -text_x_tex=590236, -text_x_uil=590237, -text_x_uuencode=590238, -text_x_vcalendar=590239, -text_x_vcard=590240, -text_xml=590241 | 0x01000000, -video_MP2T=393634, -video_animaflex=393635, -video_avi=393636, -video_avs_video=393637, -video_mp4=393638, -video_mpeg=393639, -video_quicktime=393640, -video_vdo=393641, -video_vivo=393642, -video_vnd_rn_realvideo=393643, -video_vosaic=393644, -video_webm=393645, -video_x_amt_demorun=393646, -video_x_amt_showrun=393647, -video_x_atomic3d_feature=393648, -video_x_dl=393649, -video_x_dv=393650, -video_x_fli=393651, -video_x_flv=393652, -video_x_isvideo=393653, -video_x_jng=393654 | 0x80000000, -video_x_m4v=393655, -video_x_matroska=393656, -video_x_mng=393657, -video_x_motion_jpeg=393658, -video_x_ms_asf=393659, -video_x_msvideo=393660, -video_x_qtc=393661, -video_x_sgi_movie=393662, -x_epoc_x_sisx_app=721343, +text_PGP=590185, +text_asp=590186, +text_css=590187, +text_csv=590188, +text_html=590189 | 0x01000000, +text_javascript=590190, +text_mcf=590191, +text_pascal=590192, +text_plain=590193, +text_richtext=590194, +text_rtf=590195, +text_scriplet=590196, +text_tab_separated_values=590197, +text_troff=590198, +text_uri_list=590199, +text_vnd_abc=590200, +text_vnd_fmi_flexstor=590201, +text_vnd_wap_wml=590202, +text_vnd_wap_wmlscript=590203, +text_webviewhtml=590204, +text_x_Algol68=590205, +text_x_asm=590206, +text_x_audiosoft_intra=590207, +text_x_awk=590208, +text_x_bcpl=590209, +text_x_c=590210, +text_x_c__=590211, +text_x_component=590212, +text_x_diff=590213, +text_x_fortran=590214, +text_x_java=590215, +text_x_la_asf=590216, +text_x_lisp=590217, +text_x_m=590218, +text_x_m4=590219, +text_x_makefile=590220, +text_x_ms_regedit=590221, +text_x_msdos_batch=590222, +text_x_objective_c=590223, +text_x_pascal=590224, +text_x_perl=590225, +text_x_php=590226, +text_x_po=590227, +text_x_python=590228, +text_x_ruby=590229, +text_x_sass=590230, +text_x_script_python=590231, +text_x_scss=590232, +text_x_server_parsed_html=590233, +text_x_setext=590234, +text_x_sgml=590235 | 0x01000000, +text_x_shellscript=590236, +text_x_speech=590237, +text_x_tcl=590238, +text_x_tex=590239, +text_x_uil=590240, +text_x_uuencode=590241, +text_x_vcalendar=590242, +text_x_vcard=590243, +text_xml=590244 | 0x01000000, +video_MP2T=393637, +video_animaflex=393638, +video_avi=393639, +video_avs_video=393640, +video_mp4=393641, +video_mpeg=393642, +video_quicktime=393643, +video_vdo=393644, +video_vivo=393645, +video_vnd_rn_realvideo=393646, +video_vosaic=393647, +video_webm=393648, +video_x_amt_demorun=393649, +video_x_amt_showrun=393650, +video_x_atomic3d_feature=393651, +video_x_dl=393652, +video_x_dv=393653, +video_x_fli=393654, +video_x_flv=393655, +video_x_isvideo=393656, +video_x_jng=393657 | 0x80000000, +video_x_m4v=393658, +video_x_matroska=393659, +video_x_mng=393660, +video_x_motion_jpeg=393661, +video_x_ms_asf=393662, +video_x_msvideo=393663, +video_x_qtc=393664, +video_x_sgi_movie=393665, +x_epoc_x_sisx_app=721346, }; char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { +case application_x_matlab_data: return "application/x-matlab-data"; case application_arj: return "application/arj"; case application_base64: return "application/base64"; case application_binhex: return "application/binhex"; @@ -802,6 +806,8 @@ case text_mcf: return "text/mcf"; case text_pascal: return "text/pascal"; case text_PGP: return "text/PGP"; case text_plain: return "text/plain"; +case text_x_script_python: return "text/x-script.python"; +case text_csv: return "text/csv"; case application_vnd_coffeescript: return "application/vnd.coffeescript"; case text_richtext: return "text/richtext"; case text_rtf: return "text/rtf"; @@ -906,6 +912,7 @@ case image_x_epson_erf: return "image/x-epson-erf"; case sist2_sidecar: return "sist2/sidecar"; default: return NULL;}} unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) { +case 2495639202:return application_x_matlab_data; case 104524599:return application_arj; case 1388642652:return application_base64; case 3514823219:return application_binhex; @@ -1194,6 +1201,7 @@ case 398963028:return text_javascript; case 1431272808:return text_mcf; case 509266722:return text_pascal; case 1689700070:case 794565824:case 351504808:case 214229345:case 30677878:case 1835907068:case 1154021400:case 3992351814:case 2107886487:case 2202503947:case 999008199:case 473390917:case 3679822420:case 1465078094:case 1466496025:case 2277716423:case 157353380:case 2002237032:case 4216257084:case 590894066:case 987584319:case 2268432115:case 3551958239:case 1436306077:case 3060306774:case 808890964:case 2564639436:case 3322219037:case 3334425408:case 3818365258:case 1403162576:case 590812979:case 1800036834:case 144986711:case 621471808:case 449607278:case 2403297477:case 2529069283:case 3929123204:return text_plain; +case 194218739:return text_x_script_python; case 1401235891:return application_vnd_coffeescript; case 196656302:case 1203117491:case 3183026384:return text_richtext; case 2119613712:return text_scriplet; @@ -1288,6 +1296,7 @@ case 142938048:return image_x_epson_erf; case 287571459:return sist2_sidecar; default: return 0;}} unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) { +case 3272851765: return application_x_matlab_data; case 3812269631: return application_arj; case 2479484568: return application_base64; case 3891182180: return application_binhex; @@ -1635,6 +1644,8 @@ case 768274928: return text_mcf; case 3970938585: return text_pascal; case 1059844876: return text_PGP; case 1152832851: return text_plain; +case 3112468514: return text_x_script_python; +case 1881267919: return text_csv; case 2809123822: return application_vnd_coffeescript; case 4000659158: return text_richtext; case 1060344107: return text_rtf; diff --git a/src/parsing/parse.c b/src/parsing/parse.c index 4786407..e680a03 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -46,17 +46,13 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath) return FILETYPE_MEDIA; } else if (IS_PDF(mime)) { return FILETYPE_EBOOK; - } else if (major_mime == MimeText && ScanCtx.text_ctx.content_size > 0) { - if (IS_MARKUP(mime)) { - return FILETYPE_MARKUP; - } else { - return FILETYPE_TEXT; - } - + } else if (IS_MARKUP(mime)) { + return FILETYPE_MARKUP; + } else if (major_mime == MimeText) { + return FILETYPE_TEXT; } else if (IS_FONT(mime)) { return FILETYPE_FONT; - } else if ( - ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && ( + } else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && ( IS_ARC(mime) || (IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath)) )) { @@ -98,10 +94,6 @@ int get_mime(parse_job_t *job) { } } - if (strlen(extension) == 0 && strlen(job->filepath + job->base) == 40) { - fprintf(stderr, "GIT? %s", job->filepath); - } - if (ScanCtx.fast) { return 0; } @@ -122,7 +114,6 @@ int get_mime(parse_job_t *job) { LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc)); } - return GET_MIME_ERROR_FATAL; } @@ -130,12 +121,13 @@ int get_mime(parse_job_t *job) { if (magic_mime_str != NULL) { mime = (int) mime_get_mime_by_string(magic_mime_str); - free(magic_mime_str); if (mime == 0) { LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str); + free(magic_mime_str); return 0; } + free(magic_mime_str); } if (job->vfile.reset != NULL) { @@ -163,14 +155,11 @@ void parse(parse_job_t *job) { doc->meta_head = NULL; doc->meta_tail = NULL; doc->size = job->vfile.st_size; - doc->mtime = (int) job->vfile.mtime; + doc->mtime = job->vfile.mtime; doc->mime = get_mime(job); generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id); if (doc->mime == GET_MIME_ERROR_FATAL) { - pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); - ScanCtx.dbg_failed_files_count += 1; - pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); CLOSE_FILE(job->vfile) free(doc); @@ -178,9 +167,6 @@ void parse(parse_job_t *job) { } if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) { - pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); - ScanCtx.dbg_skipped_files_count += 1; - pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); CLOSE_FILE(job->vfile) free(doc); @@ -246,7 +232,7 @@ void parse(parse_job_t *job) { meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN); meta_parent->key = MetaParent; strcpy(meta_parent->str_val, job->parent); - APPEND_META((doc), meta_parent) + APPEND_META((doc), meta_parent); } CLOSE_FILE(job->vfile) @@ -254,7 +240,7 @@ void parse(parse_job_t *job) { if (job->vfile.has_checksum) { char sha1_digest_str[SHA1_STR_LENGTH]; buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str); - APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str) + APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str); } write_document(doc); diff --git a/src/sist.h b/src/sist.h index 8e6eb5a..3598cb6 100644 --- a/src/sist.h +++ b/src/sist.h @@ -27,6 +27,8 @@ #define UNUSED(x) __attribute__((__unused__)) x +#define MAX_THREADS (256) + #include "util.h" #include "log.h" #include "types.h" diff --git a/src/tpool.c b/src/tpool.c index 8f665f8..6cdf31e 100644 --- a/src/tpool.c +++ b/src/tpool.c @@ -6,7 +6,7 @@ #include #include "parsing/parse.h" -#define BLANK_STR " " +#define BLANK_STR " " typedef struct { int thread_id; @@ -17,7 +17,6 @@ typedef struct { typedef struct tpool { pthread_t threads[256]; int num_threads; - int fork; int print_progress; @@ -32,6 +31,8 @@ typedef struct tpool { pthread_cond_t workers_initialized_cond; int busy_count; int initialized_count; + int thread_id_to_pid_mapping[MAX_THREADS]; + char ipc_database_filepath[128]; } *shm; } tpool_t; @@ -43,11 +44,6 @@ void job_destroy(job_t *job) { free(job); } -void tpool_dump_debug_info(tpool_t *pool) { - // TODO - LOG_DEBUGF("tpool.c", "pool->num_threads = %d", pool->num_threads); -} - /** * Push work object to thread pool */ @@ -130,108 +126,124 @@ static void worker_thread_loop(tpool_t *pool) { } static void worker_proc_init(tpool_t *pool, int thread_id) { - // TODO create PID -> thread_id mapping for signal handler + pthread_mutex_lock(&pool->shm->data_mutex); + pool->shm->thread_id_to_pid_mapping[thread_id] = getpid(); + pthread_mutex_unlock(&pool->shm->data_mutex); ProcData.thread_id = thread_id; if (ScanCtx.index.path[0] != '\0') { - // TODO This should be closed in proc cleanup function ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE); ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx; database_open(ProcData.index_db); } - // TODO /dev/shm pthread_mutex_lock(&pool->shm->mutex); - ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_CONSUMER_DATABASE); + ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE); ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx; database_open(ProcData.ipc_db); pthread_mutex_unlock(&pool->shm->mutex); } -void worker_proc_cleanup(tpool_t* pool) { +void worker_proc_cleanup(tpool_t *pool) { if (ProcData.index_db != NULL) { database_close(ProcData.index_db, FALSE); } database_close(ProcData.ipc_db, FALSE); } +#ifndef SIST_DEBUG +#define TPOOL_FORK +#endif + /** * Thread worker function */ static void *tpool_worker(void *arg) { tpool_t *pool = ((start_thread_arg_t *) arg)->pool; - if (pool->fork) { - while (TRUE) { - int pid = fork(); +#ifdef TPOOL_FORK + while (TRUE) { + int pid = fork(); - if (pid == 0) { - worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); + if (pid == 0) { + worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); - pthread_mutex_lock(&pool->shm->mutex); - pthread_cond_signal(&pool->shm->workers_initialized_cond); - pool->shm->initialized_count += 1; - pthread_mutex_unlock(&pool->shm->mutex); + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->workers_initialized_cond); + pool->shm->initialized_count += 1; + pthread_mutex_unlock(&pool->shm->mutex); - worker_thread_loop(pool); + worker_thread_loop(pool); - pthread_mutex_lock(&pool->shm->mutex); - pthread_cond_signal(&pool->shm->done_working_cond); - pthread_mutex_unlock(&pool->shm->mutex); + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->done_working_cond); + pthread_mutex_unlock(&pool->shm->mutex); - worker_proc_cleanup(pool); + worker_proc_cleanup(pool); - exit(0); + exit(0); - } else { - int status; - // TODO: On crash, print debug info and resume thread - waitpid(pid, &status, 0); + } else { + int status; + waitpid(pid, &status, 0); - LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status)); + LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status)); - pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex)); - pool->shm->ipc_ctx.completed_job_count += 1; - pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex)); + pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex)); + pool->shm->ipc_ctx.completed_job_count += 1; + pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex)); - pthread_mutex_lock(&(pool->shm->data_mutex)); - pool->shm->busy_count -= 1; - pthread_mutex_unlock(&(pool->shm->data_mutex)); + pthread_mutex_lock(&(pool->shm->data_mutex)); + pool->shm->busy_count -= 1; + pthread_mutex_unlock(&(pool->shm->data_mutex)); - if (WIFSIGNALED(status)) { - // TODO: Get current_job based on PID - const char *job_filepath = "TODO"; - - LOG_FATALF_NO_EXIT( - "tpool.c", - "Child process was terminated by signal (%s).\n" - BLANK_STR "The process was working on %s", - strsignal(WTERMSIG(status)), - job_filepath - ); + if (WIFSIGNALED(status)) { + int crashed_thread_id = -1; + for (int i = 0; i < MAX_THREADS; i++) { + if (pool->shm->thread_id_to_pid_mapping[i] == pid) { + crashed_thread_id = i; + break; + } } - break; + + const char *job_filepath; + if (crashed_thread_id != -1) { + job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id]; + } else { + job_filepath = "unknown"; + } + + LOG_FATALF_NO_EXIT( + "tpool.c", + "Child process crashed (%s).\n" + BLANK_STR "The process was working on %s\n" + BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n" + BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n", + strsignal(WTERMSIG(status)), + job_filepath + ); + continue; } + break; } - - } else { - worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); - - pthread_mutex_lock(&pool->shm->mutex); - pthread_cond_signal(&pool->shm->workers_initialized_cond); - pool->shm->initialized_count += 1; - pthread_mutex_unlock(&pool->shm->mutex); - - worker_thread_loop(pool); - - pthread_mutex_lock(&pool->shm->mutex); - pthread_cond_signal(&pool->shm->done_working_cond); - pthread_mutex_unlock(&pool->shm->mutex); - - return NULL; } +#else + worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); + + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->workers_initialized_cond); + pool->shm->initialized_count += 1; + pthread_mutex_unlock(&pool->shm->mutex); + + worker_thread_loop(pool); + + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->done_working_cond); + pthread_mutex_unlock(&pool->shm->mutex); +#endif + return NULL; } @@ -295,13 +307,10 @@ void tpool_destroy(tpool_t *pool) { */ tpool_t *tpool_create(int thread_cnt, int print_progress) { - int fork = FALSE; - tpool_t *pool = malloc(sizeof(tpool_t)); pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - pool->fork = fork; pool->num_threads = thread_cnt; pool->shm->ipc_ctx.job_count = 0; pool->shm->ipc_ctx.no_more_jobs = FALSE; @@ -310,6 +319,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) { pool->shm->job_type = JOB_UNDEFINED; memset(pool->threads, 0, sizeof(pool->threads)); pool->print_progress = print_progress; + sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid()); pthread_mutexattr_t mutexattr; pthread_mutexattr_init(&mutexattr); @@ -329,10 +339,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) { pthread_cond_init(&(pool->shm->done_working_cond), &condattr); pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr); - remove("/dev/shm/ipc.sist2"); - remove("/dev/shm/ipc.sist2-wal"); - remove("/dev/shm/ipc.sist2-shm"); - ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_PRODUCER_DATABASE); + ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE); ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx; database_initialize(ProcData.ipc_db); diff --git a/src/tpool.h b/src/tpool.h index 71742d8..b4350bc 100644 --- a/src/tpool.h +++ b/src/tpool.h @@ -19,8 +19,6 @@ int tpool_add_work(tpool_t *pool, job_t *job); void tpool_wait(tpool_t *pool); -void tpool_dump_debug_info(tpool_t *pool); - void job_destroy(job_t *job); #endif diff --git a/third-party/libscan/libscan/arc/arc.c b/third-party/libscan/libscan/arc/arc.c index d56f538..bb5ed32 100644 --- a/third-party/libscan/libscan/arc/arc.c +++ b/third-party/libscan/libscan/arc/arc.c @@ -147,7 +147,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre } if (ret != ARCHIVE_OK) { - CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) + CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)); archive_read_free(a); return SCAN_ERR_READ; } @@ -169,7 +169,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur); meta_list->key = MetaContent; strcpy(meta_list->str_val, buf.buf); - APPEND_META(doc, meta_list) + APPEND_META(doc, meta_list); dyn_buffer_destroy(&buf); } else { @@ -212,13 +212,13 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre double decompressed_size_ratio = (double) sub_job->vfile.st_size / (double) f->st_size; if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) { CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath, - decompressed_size_ratio) + decompressed_size_ratio); break; } // Handle excludes if (exclude != NULL && EXCLUDED(sub_job->filepath)) { - CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath) + CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath); continue; } diff --git a/third-party/libscan/libscan/comic/comic.c b/third-party/libscan/libscan/comic/comic.c index aeb0baf..263ff70 100644 --- a/third-party/libscan/libscan/comic/comic.c +++ b/third-party/libscan/libscan/comic/comic.c @@ -18,7 +18,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) { int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE); if (ret != ARCHIVE_OK) { - CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a)) + CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a)); archive_read_free(a); return; } @@ -38,7 +38,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) { if (read != entry_size) { const char *err_str = archive_error_string(a); if (err_str) { - CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str) + CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str); } free(buf); break; diff --git a/third-party/libscan/libscan/ebook/ebook.c b/third-party/libscan/libscan/ebook/ebook.c index 95fb4a4..b53d937 100644 --- a/third-party/libscan/libscan/ebook/ebook.c +++ b/third-party/libscan/libscan/ebook/ebook.c @@ -54,7 +54,7 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd fz_catch(fzctx)err = 1; if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message); return NULL; } @@ -86,14 +86,14 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd } fz_catch(fzctx)err = fzctx->error.errcode; if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message); fz_drop_page(fzctx, *cover); fz_drop_pixmap(fzctx, pixmap); return NULL; } if (pixmap->n != 3) { - CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n) + CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n); fz_drop_page(fzctx, *cover); fz_drop_pixmap(fzctx, pixmap); return NULL; @@ -113,7 +113,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d if (pixmap_is_blank(pixmap)) { fz_drop_page(fzctx, cover); fz_drop_pixmap(fzctx, pixmap); - CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead") + CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead"); pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover); if (pixmap == NULL) { return FALSE; @@ -161,7 +161,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d av_init_packet(&jpeg_packet); avcodec_receive_packet(jpeg_encoder, &jpeg_packet); - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store(doc->doc_id, 0, (char *) jpeg_packet.data, jpeg_packet.size); free(samples); @@ -180,14 +180,14 @@ void fz_err_callback(void *user, const char *message) { document_t *doc = (document_t *) user; const scan_ebook_ctx_t *ctx = &thread_ctx; - CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message) + CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message); } void fz_warn_callback(void *user, const char *message) { document_t *doc = (document_t *) user; const scan_ebook_ctx_t *ctx = &thread_ctx; - CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message) + CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message); } static void init_fzctx(fz_context *fzctx, document_t *doc) { @@ -243,7 +243,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), if (img->w >= MIN_OCR_WIDTH && img->h >= MIN_OCR_HEIGHT && OCR_IS_VALID_BPP(img->n)) { fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor); - ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb); + ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, (int)pix->stride, pix->xres, fill_image_ocr_cb); fz_drop_pixmap(fzctx, pix); } } @@ -282,14 +282,14 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi fz_catch(fzctx)err = fzctx->error.errcode; if (err) { - CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message); fz_drop_stream(fzctx, stream); fz_drop_document(fzctx, fzdoc); fz_drop_context(fzctx); return; } - APPEND_LONG_META(doc, MetaPages, page_count) + APPEND_LONG_META(doc, MetaPages, page_count); if (ctx->enable_tn) { if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) { @@ -312,7 +312,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi fz_catch(fzctx); if (strlen(title) > 0) { - APPEND_UTF8_META(doc, MetaTitle, title) + APPEND_UTF8_META(doc, MetaTitle, title); } char author[4096] = {'\0',}; @@ -320,7 +320,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi fz_catch(fzctx); if (strlen(author) > 0) { - APPEND_UTF8_META(doc, MetaAuthor, author) + APPEND_UTF8_META(doc, MetaAuthor, author); } @@ -334,7 +334,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page); fz_catch(fzctx)err = fzctx->error.errcode; if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message); text_buffer_destroy(&thread_buffer); fz_drop_page(fzctx, page); fz_drop_stream(fzctx, stream); @@ -363,7 +363,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi } fz_catch(fzctx)err = fzctx->error.errcode; if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message); text_buffer_destroy(&thread_buffer); fz_drop_page(fzctx, page); fz_drop_stext_page(fzctx, stext); @@ -393,7 +393,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur); meta_content->key = MetaContent; memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur); - APPEND_META(doc, meta_content) + APPEND_META(doc, meta_content); text_buffer_destroy(&thread_buffer); } @@ -418,7 +418,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) { int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE); if (ret != ARCHIVE_OK) { - CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a)) + CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a)); archive_read_free(a); return; } @@ -439,7 +439,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) { if (read != entry_size) { const char *err_str = archive_error_string(a); if (err_str) { - CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str) + CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str); } free(buf); break; @@ -460,7 +460,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) { meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur); meta_content->key = MetaContent; memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur); - APPEND_META(doc, meta_content) + APPEND_META(doc, meta_content); text_buffer_destroy(&content_buffer); @@ -477,7 +477,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, docume size_t buf_len; void *buf = read_all(f, &buf_len); if (buf == NULL) { - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } diff --git a/third-party/libscan/libscan/font/font.c b/third-party/libscan/libscan/font/font.c index d734134..7c37665 100644 --- a/third-party/libscan/libscan/font/font.c +++ b/third-party/libscan/libscan/font/font.c @@ -146,7 +146,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { size_t buf_len = 0; void *buf = read_all(f, &buf_len); if (buf == NULL) { - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } @@ -154,7 +154,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face); if (err != 0) { CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, - FT_Error_String(err)) + FT_Error_String(err)); free(buf); return; } @@ -174,7 +174,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name)); meta_name->key = MetaFontName; strcpy(meta_name->str_val, font_name); - APPEND_META(doc, meta_name) + APPEND_META(doc, meta_name); if (!ctx->enable_tn) { FT_Done_Face(face); @@ -188,7 +188,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { err = FT_Set_Pixel_Sizes(face, 0, pixel); if (err != 0) { CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, - FT_Error_String(err)) + FT_Error_String(err)); FT_Done_Face(face); free(buf); return; @@ -210,7 +210,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); if (err != 0) { CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, - FT_Error_String(err)) + FT_Error_String(err)); continue; } } @@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { dyn_buffer_t bmp_data = dyn_buffer_create(); bmp_format(&bmp_data, dimensions, bitmap); - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store(doc->doc_id, 0, bmp_data.buf, bmp_data.cur); dyn_buffer_destroy(&bmp_data); diff --git a/third-party/libscan/libscan/json/json.c b/third-party/libscan/libscan/json/json.c index ef93405..1bf590f 100644 --- a/third-party/libscan/libscan/json/json.c +++ b/third-party/libscan/libscan/json/json.c @@ -33,7 +33,7 @@ int json_extract_text(cJSON *json, text_buffer_t *tex) { scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) { if (f->st_size > JSON_MAX_FILE_SIZE) { - CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath) + CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath); return SCAN_ERR_SKIP; } diff --git a/third-party/libscan/libscan/macros.h b/third-party/libscan/libscan/macros.h index 944225c..00084b2 100644 --- a/third-party/libscan/libscan/macros.h +++ b/third-party/libscan/libscan/macros.h @@ -25,20 +25,20 @@ #define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1) #define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) -#define APPEND_STR_META(doc, keyname, value) \ +#define APPEND_STR_META(doc, keyname, value) do {\ {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \ meta_str->key = keyname; \ strcpy(meta_str->str_val, value); \ - APPEND_META(doc, meta_str)} + APPEND_META(doc, meta_str);}} while(0) -#define APPEND_LONG_META(doc, keyname, value) \ +#define APPEND_LONG_META(doc, keyname, value) do{\ {meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \ meta_long->key = keyname; \ meta_long->long_val = value; \ - APPEND_META(doc, meta_long)} + APPEND_META(doc, meta_long);}} while(0) -#define APPEND_META(doc, meta) \ +#define APPEND_META(doc, meta) do {\ meta->next = NULL;\ if (doc->meta_head == NULL) {\ doc->meta_head = meta;\ @@ -46,7 +46,7 @@ } else {\ doc->meta_tail->next = meta;\ doc->meta_tail = meta;\ - } + }}while(0) #define APPEND_UTF8_META(doc, keyname, str) \ text_buffer_t tex = text_buffer_create(-1); \ @@ -55,5 +55,5 @@ meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \ meta_tag->key = keyname; \ strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \ - APPEND_META(doc, meta_tag) \ - text_buffer_destroy(&tex); + APPEND_META(doc, meta_tag); \ + text_buffer_destroy(&tex) diff --git a/third-party/libscan/libscan/media/media.c b/third-party/libscan/libscan/media/media.c index 38cb421..fa62724 100644 --- a/third-party/libscan/libscan/media/media.c +++ b/third-party/libscan/libscan/media/media.c @@ -163,7 +163,7 @@ static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, i text_buffer_terminate_string(&tex); - APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) + APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); text_buffer_destroy(&tex); avcodec_free_context(&decoder); } @@ -190,7 +190,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d CTX_LOG_WARNINGF(doc->filepath, "(media.c) avcodec_read_frame() returned error code [%d] %s", read_frame_ret, av_err2str(read_frame_ret) - ) + ); } frame_and_packet_free(result); return NULL; @@ -210,7 +210,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d CTX_LOG_ERRORF(doc->filepath, "(media.c) avcodec_send_packet() returned error code [%d] %s", decode_ret, av_err2str(decode_ret) - ) + ); frame_and_packet_free(result); return NULL; } @@ -230,7 +230,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic while (meta != NULL) { if (meta->key == key) { CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'", - key, meta->str_val, key, tag->value) + key, meta->str_val, key, tag->value); return; } meta = meta->next; @@ -243,7 +243,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic meta_tag->key = key; strcpy(meta_tag->str_val, tex.dyn_buffer.buf); - APPEND_META(doc, meta_tag) + APPEND_META(doc, meta_tag); text_buffer_destroy(&tex); } @@ -253,7 +253,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic #define STRCPY_TOLOWER(dst, str) \ strncpy(dst, str, sizeof(dst)); \ char *ptr = dst; \ - for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr); + for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr) __always_inline static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) { @@ -261,18 +261,18 @@ static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx AVDictionaryEntry *tag = NULL; while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { char key[256]; - STRCPY_TOLOWER(key, tag->key) + STRCPY_TOLOWER(key, tag->key); if (strcmp(key, "artist") == 0) { - APPEND_TAG_META(MetaArtist) + APPEND_TAG_META(MetaArtist); } else if (strcmp(key, "genre") == 0) { - APPEND_TAG_META(MetaGenre) + APPEND_TAG_META(MetaGenre); } else if (strcmp(key, "title") == 0) { - APPEND_TAG_META(MetaTitle) + APPEND_TAG_META(MetaTitle); } else if (strcmp(key, "album_artist") == 0) { - APPEND_TAG_META(MetaAlbumArtist) + APPEND_TAG_META(MetaAlbumArtist); } else if (strcmp(key, "album") == 0) { - APPEND_TAG_META(MetaAlbum) + APPEND_TAG_META(MetaAlbum); } else if (strcmp(key, "comment") == 0) { append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent); } @@ -291,14 +291,14 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f if (meta_duration->long_val > INT32_MAX) { meta_duration->long_val = 0; } - APPEND_META(doc, meta_duration) + APPEND_META(doc, meta_duration); } if (pFormatCtx->bit_rate != 0) { meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); meta_bitrate->key = MetaMediaBitrate; meta_bitrate->long_val = pFormatCtx->bit_rate; - APPEND_META(doc, meta_bitrate) + APPEND_META(doc, meta_bitrate); } } @@ -306,7 +306,7 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f if (is_video) { while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { char key[256]; - STRCPY_TOLOWER(key, tag->key) + STRCPY_TOLOWER(key, tag->key); if (strcmp(key, "title") == 0) { append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle); @@ -320,38 +320,38 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f // EXIF metadata while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { char key[256]; - STRCPY_TOLOWER(key, tag->key) + STRCPY_TOLOWER(key, tag->key); if (strcmp(key, "artist") == 0) { append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist); } else if (strcmp(key, "imagedescription") == 0) { append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent); } else if (strcmp(key, "make") == 0) { - APPEND_TAG_META(MetaExifMake) + APPEND_TAG_META(MetaExifMake); } else if (strcmp(key, "model") == 0) { - APPEND_TAG_META(MetaExifModel) + APPEND_TAG_META(MetaExifModel); } else if (strcmp(key, "software") == 0) { - APPEND_TAG_META(MetaExifSoftware) + APPEND_TAG_META(MetaExifSoftware); } else if (strcmp(key, "fnumber") == 0) { - APPEND_TAG_META(MetaExifFNumber) + APPEND_TAG_META(MetaExifFNumber); } else if (strcmp(key, "focallength") == 0) { - APPEND_TAG_META(MetaExifFocalLength) + APPEND_TAG_META(MetaExifFocalLength); } else if (strcmp(key, "usercomment") == 0) { - APPEND_TAG_META(MetaExifUserComment) + APPEND_TAG_META(MetaExifUserComment); } else if (strcmp(key, "isospeedratings") == 0) { - APPEND_TAG_META(MetaExifIsoSpeedRatings) + APPEND_TAG_META(MetaExifIsoSpeedRatings); } else if (strcmp(key, "exposuretime") == 0) { - APPEND_TAG_META(MetaExifExposureTime) + APPEND_TAG_META(MetaExifExposureTime); } else if (strcmp(key, "datetime") == 0) { - APPEND_TAG_META(MetaExifDateTime) + APPEND_TAG_META(MetaExifDateTime); } else if (strcmp(key, "gpslatitude") == 0) { - APPEND_TAG_META(MetaExifGpsLatitudeDMS) + APPEND_TAG_META(MetaExifGpsLatitudeDMS); } else if (strcmp(key, "gpslatituderef") == 0) { - APPEND_TAG_META(MetaExifGpsLatitudeRef) + APPEND_TAG_META(MetaExifGpsLatitudeRef); } else if (strcmp(key, "gpslongitude") == 0) { - APPEND_TAG_META(MetaExifGpsLongitudeDMS) + APPEND_TAG_META(MetaExifGpsLongitudeDMS); } else if (strcmp(key, "gpslongituderef") == 0) { - APPEND_TAG_META(MetaExifGpsLongitudeRef) + APPEND_TAG_META(MetaExifGpsLongitudeRef); } } } @@ -432,11 +432,11 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor CTX_LOG_DEBUGF( doc->filepath, "(media.c) Could not seek media file: %s", av_err2str(seek_ret) - ) + ); } if (seek_ok == FALSE && thumbnail_index != 0) { - CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.") + CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails."); return SAVE_THUMBNAIL_FAILED; } } @@ -522,7 +522,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id); if (desc != NULL) { - APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name) + APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name); } audio_stream = i; @@ -533,18 +533,18 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id); if (desc != NULL) { - APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name) + APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name); } meta_line_t *meta_w = malloc(sizeof(meta_line_t)); meta_w->key = MetaWidth; meta_w->long_val = stream->codecpar->width; - APPEND_META(doc, meta_w) + APPEND_META(doc, meta_w); meta_line_t *meta_h = malloc(sizeof(meta_line_t)); meta_h->key = MetaHeight; meta_h->long_val = stream->codecpar->height; - APPEND_META(doc, meta_h) + APPEND_META(doc, meta_h); video_stream = i; } @@ -611,7 +611,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, } if (number_of_thumbnails_generated > 0) { - APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated) + APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated); } avcodec_free_context(&decoder); @@ -625,12 +625,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_ AVFormatContext *pFormatCtx = avformat_alloc_context(); if (pFormatCtx == NULL) { - CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()"); return; } int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); if (res < 0) { - CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) + CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)); avformat_close_input(&pFormatCtx); avformat_free_context(pFormatCtx); return; @@ -724,7 +724,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, AVFormatContext *pFormatCtx = avformat_alloc_context(); if (pFormatCtx == NULL) { - CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()"); return; } @@ -737,13 +737,13 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, if (f->st_size <= ctx->max_media_buffer) { int ret = memfile_open(f, &memfile); if (ret == 0) { - CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->st_size) + CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->st_size); io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); } } if (io_ctx == NULL) { - CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->st_size) + CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->st_size); io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL); } @@ -752,7 +752,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc, int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); if (res < 0) { if (res != -5) { - CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) + CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)); } av_free(io_ctx->buffer); memfile_close(&memfile); @@ -787,7 +787,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu AVFormatContext *pFormatCtx = avformat_alloc_context(); if (pFormatCtx == NULL) { - CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()"); return FALSE; } @@ -795,7 +795,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu int ret = memfile_open_buf(buf, buf_len, &memfile); if (ret == 0) { - CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len) + CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len); io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); } else { avformat_close_input(&pFormatCtx); @@ -850,7 +850,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu } if (scaled_frame == STORE_AS_IS) { - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size); } else { // Encode frame to jpeg @@ -863,7 +863,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu avcodec_receive_packet(jpeg_encoder, &jpeg_packet); // Save thumbnail - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size); av_packet_unref(&jpeg_packet); diff --git a/third-party/libscan/libscan/mobi/scan_mobi.c b/third-party/libscan/libscan/mobi/scan_mobi.c index 1a42fab..9d2c5d9 100644 --- a/third-party/libscan/libscan/mobi/scan_mobi.c +++ b/third-party/libscan/libscan/mobi/scan_mobi.c @@ -8,7 +8,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { MOBIData *m = mobi_init(); if (m == NULL) { - CTX_LOG_ERROR(f->filepath, "mobi_init() failed") + CTX_LOG_ERROR(f->filepath, "mobi_init() failed"); return; } @@ -16,7 +16,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { char* buf = read_all(f, &buf_len); if (buf == NULL) { mobi_free(m); - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } @@ -24,7 +24,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { if (file == NULL) { mobi_free(m); free(buf); - CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno) + CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno); return; } @@ -33,25 +33,25 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { if (mobi_ret != MOBI_SUCCESS) { mobi_free(m); free(buf); - CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret) + CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret); return; } char *author = mobi_meta_get_author(m); if (author != NULL) { - APPEND_STR_META(doc, MetaAuthor, author) + APPEND_STR_META(doc, MetaAuthor, author); free(author); } char *title = mobi_meta_get_title(m); if (title != NULL) { - APPEND_STR_META(doc, MetaTitle, title) + APPEND_STR_META(doc, MetaTitle, title); free(title); } const size_t maxlen = mobi_get_text_maxsize(m); if (maxlen == MOBI_NOTSET) { free(buf); - CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen) + CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen); return; } @@ -62,7 +62,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { mobi_free(m); free(content_str); free(buf); - CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret) + CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret); return; } @@ -70,7 +70,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) { text_buffer_append_markup(&tex, content_str); text_buffer_terminate_string(&tex); - APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) + APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); free(content_str); free(buf); diff --git a/third-party/libscan/libscan/msdoc/msdoc.c b/third-party/libscan/libscan/msdoc/msdoc.c index a628ea6..526b1c1 100644 --- a/third-party/libscan/libscan/msdoc/msdoc.c +++ b/third-party/libscan/libscan/msdoc/msdoc.c @@ -39,12 +39,12 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi iInitDocument(file_in, (int) buf_len); const char *author = szGetAuthor(); if (author != NULL) { - APPEND_UTF8_META(doc, MetaAuthor, author) + APPEND_UTF8_META(doc, MetaAuthor, author); } const char *title = szGetTitle(); if (title != NULL) { - APPEND_UTF8_META(doc, MetaTitle, title) + APPEND_UTF8_META(doc, MetaTitle, title); } vFreeDocument(); @@ -60,7 +60,7 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); meta_content->key = MetaContent; memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur); - APPEND_META(doc, meta_content) + APPEND_META(doc, meta_content); text_buffer_destroy(&tex); } @@ -74,14 +74,14 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) { size_t buf_len; char *buf = read_all(f, &buf_len); if (buf == NULL) { - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } FILE *file = fmemopen(buf, buf_len, "rb"); if (file == NULL) { free(buf); - CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno) + CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno); return; } diff --git a/third-party/libscan/libscan/ooxml/ooxml.c b/third-party/libscan/libscan/ooxml/ooxml.c index 49010e1..da23683 100644 --- a/third-party/libscan/libscan/ooxml/ooxml.c +++ b/third-party/libscan/libscan/ooxml/ooxml.c @@ -39,7 +39,7 @@ int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_ xmlErrorPtr err = xmlGetLastError(); if (err != NULL) { if (err->level == XML_ERR_FATAL) { - CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) + CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message); return -1; } } @@ -85,13 +85,13 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); if (xml == NULL) { - CTX_LOG_ERROR(doc->filepath, "Could not parse XML") + CTX_LOG_ERROR(doc->filepath, "Could not parse XML"); return READ_PART_ERR; } xmlNode *root = xmlDocGetRootElement(xml); if (root == NULL) { - CTX_LOG_ERROR(doc->filepath, "Empty document") + CTX_LOG_ERROR(doc->filepath, "Empty document"); xmlFreeDoc(xml); return READ_PART_ERR; } @@ -108,13 +108,13 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); if (xml == NULL) { - CTX_LOG_ERROR(doc->filepath, "Could not parse XML") + CTX_LOG_ERROR(doc->filepath, "Could not parse XML"); return -1; } xmlNode *root = xmlDocGetRootElement(xml); if (root == NULL) { - CTX_LOG_ERROR(doc->filepath, "Empty document") + CTX_LOG_ERROR(doc->filepath, "Empty document"); xmlFreeDoc(xml); return -1; } @@ -127,7 +127,7 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document } if (xmlStrEqual(child->name, _X("Pages"))) { - APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10)) + APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10)); } xmlFree(text); @@ -144,13 +144,13 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t * XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); if (xml == NULL) { - CTX_LOG_ERROR(doc->filepath, "Could not parse XML") + CTX_LOG_ERROR(doc->filepath, "Could not parse XML"); return -1; } xmlNode *root = xmlDocGetRootElement(xml); if (root == NULL) { - CTX_LOG_ERROR(doc->filepath, "Empty document") + CTX_LOG_ERROR(doc->filepath, "Empty document"); xmlFreeDoc(xml); return -1; } @@ -163,11 +163,11 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t * } if (xmlStrEqual(child->name, _X("title"))) { - APPEND_STR_META(doc, MetaTitle, (char *) text) + APPEND_STR_META(doc, MetaTitle, (char *) text); } else if (xmlStrEqual(child->name, _X("creator"))) { - APPEND_STR_META(doc, MetaAuthor, (char *) text) + APPEND_STR_META(doc, MetaAuthor, (char *) text); } else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) { - APPEND_STR_META(doc, MetaModifiedBy, (char *) text) + APPEND_STR_META(doc, MetaModifiedBy, (char *) text); } xmlFree(text); @@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s char *buf = malloc(entry_size); archive_read_data(a, buf, entry_size); - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store(doc->doc_id, 1, buf, entry_size); free(buf); } @@ -200,7 +200,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) { size_t buf_len; void *buf = read_all(f, &buf_len); if (buf == NULL) { - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } @@ -209,7 +209,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) { int ret = archive_read_open_memory(a, buf, buf_len); if (ret != ARCHIVE_OK) { - CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) + CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)); archive_read_free(a); free(buf); return; @@ -250,7 +250,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) { meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); meta->key = MetaContent; strcpy(meta->str_val, tex.dyn_buffer.buf); - APPEND_META(doc, meta) + APPEND_META(doc, meta); } archive_read_close(a); diff --git a/third-party/libscan/libscan/raw/raw.c b/third-party/libscan/libscan/raw/raw.c index c4b07da..7991d7c 100644 --- a/third-party/libscan/libscan/raw/raw.c +++ b/third-party/libscan/libscan/raw/raw.c @@ -83,7 +83,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do av_init_packet(&jpeg_packet); avcodec_receive_packet(jpeg_encoder, &jpeg_packet); - APPEND_LONG_META(doc, MetaThumbnail, 1) + APPEND_LONG_META(doc, MetaThumbnail, 1); ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size); av_packet_unref(&jpeg_packet); @@ -100,76 +100,76 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { libraw_data_t *libraw_lib = libraw_init(0); if (!libraw_lib) { - CTX_LOG_ERROR("raw.c", "Cannot create libraw handle") + CTX_LOG_ERROR("raw.c", "Cannot create libraw handle"); return; } size_t buf_len = 0; void *buf = read_all(f, &buf_len); if (buf == NULL) { - CTX_LOG_ERROR(f->filepath, "read_all() failed") + CTX_LOG_ERROR(f->filepath, "read_all() failed"); return; } int ret = libraw_open_buffer(libraw_lib, buf, buf_len); if (ret != 0) { - CTX_LOG_ERROR(f->filepath, "Could not open raw file") + CTX_LOG_ERROR(f->filepath, "Could not open raw file"); free(buf); libraw_close(libraw_lib); return; } if (*libraw_lib->idata.model != '\0') { - APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model) + APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model); } if (*libraw_lib->idata.make != '\0') { - APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make) + APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make); } if (*libraw_lib->idata.software != '\0') { - APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software) + APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software); } - APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width) - APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height) + APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width); + APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height); char tmp[1024]; snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed); - APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp) + APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp); if (*libraw_lib->other.desc != '\0') { - APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc) + APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc); } if (*libraw_lib->other.artist != '\0') { - APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist) + APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist); } struct tm *time = localtime(&libraw_lib->other.timestamp); strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time); - APPEND_STR_META(doc, MetaExifDateTime, tmp) + APPEND_STR_META(doc, MetaExifDateTime, tmp); snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len); - APPEND_STR_META(doc, MetaExifFocalLength, tmp) + APPEND_STR_META(doc, MetaExifFocalLength, tmp); snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture); - APPEND_STR_META(doc, MetaExifFNumber, tmp) + APPEND_STR_META(doc, MetaExifFNumber, tmp); int denominator = (int) roundf(1 / libraw_lib->other.shutter); snprintf(tmp, sizeof(tmp), "1/%d", denominator); - APPEND_STR_META(doc, MetaExifExposureTime, tmp) + APPEND_STR_META(doc, MetaExifExposureTime, tmp); libraw_gps_info_t gps = libraw_lib->other.parsed_gps; double gps_longitude_dec = (gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref); snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec); if (gps_longitude_dec != 0.0) { - APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp) + APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp); } double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref); snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec); if (gps_latitude_dec != 0.0) { - APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp) + APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp); } - APPEND_STR_META(doc, MetaMediaVideoCodec, "raw") + APPEND_STR_META(doc, MetaMediaVideoCodec, "raw"); if (!ctx->enable_tn) { free(buf); @@ -179,7 +179,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { int unpack_ret = libraw_unpack_thumb(libraw_lib); if (unpack_ret != 0) { - CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret) + CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret); free(buf); libraw_close(libraw_lib); return; @@ -212,7 +212,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { ret = libraw_unpack(libraw_lib); if (ret != 0) { - CTX_LOG_ERROR(f->filepath, "Could not unpack raw file") + CTX_LOG_ERROR(f->filepath, "Could not unpack raw file"); free(buf); libraw_close(libraw_lib); return; diff --git a/third-party/libscan/libscan/scan.h b/third-party/libscan/libscan/scan.h index 9d09016..60cf21f 100644 --- a/third-party/libscan/libscan/scan.h +++ b/third-party/libscan/libscan/scan.h @@ -34,20 +34,20 @@ typedef int scan_code_t; #define LEVEL_ERROR 3 #define LEVEL_FATAL 4 -#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__); -#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str); +#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__) +#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str) -#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__); -#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str); +#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__) +#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str) -#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__); -#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str); +#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__) +#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str) -#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__); -#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str); +#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__) +#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str) -#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); -#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); +#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1) +#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1) #define SIST_DOC_ID_LEN MD5_STR_LENGTH #define SIST_INDEX_ID_LEN MD5_STR_LENGTH diff --git a/third-party/libscan/libscan/text/text.c b/third-party/libscan/libscan/text/text.c index b4ffe33..663253d 100644 --- a/third-party/libscan/libscan/text/text.c +++ b/third-party/libscan/libscan/text/text.c @@ -2,6 +2,10 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { + if (ctx->content_size <= 0) { + return SCAN_OK; + } + int to_read = MIN(ctx->content_size, f->st_size); if (to_read <= 2) { @@ -11,7 +15,7 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { char *buf = malloc(to_read); int ret = f->read(f, buf, to_read); if (ret < 0) { - CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret) + CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret); free(buf); return SCAN_ERR_READ; } @@ -39,12 +43,16 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { + if (ctx->content_size <= 0) { + return SCAN_OK; + } + int to_read = MIN(MAX_MARKUP_SIZE, f->st_size); char *buf = malloc(to_read + 1); int ret = f->read(f, buf, to_read); if (ret < 0) { - CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret) + CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret); free(buf); return SCAN_ERR_READ; } diff --git a/third-party/libscan/libscan/wpd/wpd.c b/third-party/libscan/libscan/wpd/wpd.c index d1c13b6..e71f0d1 100644 --- a/third-party/libscan/libscan/wpd/wpd.c +++ b/third-party/libscan/libscan/wpd/wpd.c @@ -10,14 +10,14 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) { wpd_confidence_t conf = wpd_is_file_format_supported(stream); if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) { - CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf) + CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf); wpd_memory_stream_destroy(stream); free(buf); return SCAN_ERR_READ; } if (conf != C_WPD_CONFIDENCE_EXCELLENT) { - CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf) + CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf); wpd_memory_stream_destroy(stream); free(buf); return SCAN_ERR_READ; @@ -28,11 +28,11 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) { if (res != C_WPD_OK) { CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)", - doc->filepath, res) + doc->filepath, res); } if (tex.dyn_buffer.cur != 0) { - APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) + APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf); } text_buffer_destroy(&tex);