diff --git a/src/cli.c b/src/cli.c index 2aa3c54..2acbd2d 100644 --- a/src/cli.c +++ b/src/cli.c @@ -232,6 +232,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) + LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub) LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer) diff --git a/src/cli.h b/src/cli.h index 085807b..f7dbd2c 100644 --- a/src/cli.h +++ b/src/cli.h @@ -27,6 +27,7 @@ typedef struct scan_args { double treemap_threshold; int max_memory_buffer; int read_subtitles; + int fast_epub; } scan_args_t; scan_args_t *scan_args_create(); diff --git a/src/ctx.h b/src/ctx.h index d6c14b1..d8a8ec9 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -43,6 +43,11 @@ typedef struct { GHashTable *dbg_current_files; pthread_mutex_t dbg_current_files_mu; + int dbg_failed_files_count; + int dbg_skipped_files_count; + int dbg_excluded_files_count; + pthread_mutex_t dbg_file_counts_mu; + scan_arc_ctx_t arc_ctx; scan_comic_ctx_t comic_ctx; scan_ebook_ctx_t ebook_ctx; diff --git a/src/io/walk.c b/src/io/walk.c index 0d7ad5e..4f5f5f4 100644 --- a/src/io/walk.c +++ b/src/io/walk.c @@ -41,6 +41,10 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { LOG_DEBUGF("walk.c", "Excluded: %s", filepath) + + pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); + ScanCtx.dbg_excluded_files_count += 1; + pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); return 0; } @@ -51,6 +55,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st return 0; } +#define MAX_FILE_DESCRIPTORS 64 + int walk_directory_tree(const char *dirpath) { - return nftw(dirpath, handle_entry, 15, FTW_PHYS); + return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH); } diff --git a/src/main.c b/src/main.c index 1991d8f..63f5ea2 100644 --- a/src/main.c +++ b/src/main.c @@ -21,7 +21,7 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "2.10.2"; +static const char *const Version = "2.10.3"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", @@ -171,6 +171,8 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL); pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); + pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); + // Comic ScanCtx.comic_ctx.log = _log; ScanCtx.comic_ctx.logf = _logf; @@ -189,6 +191,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.ebook_ctx.log = _log; ScanCtx.ebook_ctx.logf = _logf; ScanCtx.ebook_ctx.store = _store; + ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub; // Font ScanCtx.font_ctx.enable_tn = args->size > 0; @@ -249,6 +252,37 @@ void initialize_scan_context(scan_args_t *args) { } +void load_incremental_index(const scan_args_t *args) { + ScanCtx.original_table = incremental_get_table(); + ScanCtx.copy_table = incremental_get_table(); + + DIR *dir = opendir(args->incremental); + if (dir == NULL) { + LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno)) + } + + char descriptor_path[PATH_MAX]; + snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental); + index_descriptor_t original_desc = read_index_descriptor(descriptor_path); + + if (strcmp(original_desc.version, Version) != 0) { + LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version, + Version, INDEX_VERSION_EXTERNAL) + } + + struct dirent *de; + while ((de = readdir(dir)) != NULL) { + if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { + char file_path[PATH_MAX]; + snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); + incremental_read(ScanCtx.original_table, file_path); + } + } + closedir(dir); + + LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) +} + void sist2_scan(scan_args_t *args) { ScanCtx.mime_table = mime_get_mime_table(); @@ -270,42 +304,22 @@ void sist2_scan(scan_args_t *args) { scan_print_header(); if (args->incremental != NULL) { - ScanCtx.original_table = incremental_get_table(); - ScanCtx.copy_table = incremental_get_table(); - - DIR *dir = opendir(args->incremental); - if (dir == NULL) { - LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno)) - } - - char descriptor_path[PATH_MAX]; - snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental); - index_descriptor_t original_desc = read_index_descriptor(descriptor_path); - - if (strcmp(original_desc.version, Version) != 0) { - LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version, - Version, INDEX_VERSION_EXTERNAL) - } - - struct dirent *de; - while ((de = readdir(dir)) != NULL) { - if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { - char file_path[PATH_MAX]; - snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); - incremental_read(ScanCtx.original_table, file_path); - } - } - closedir(dir); - - LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) + load_incremental_index(args); } ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE); tpool_start(ScanCtx.pool); - walk_directory_tree(ScanCtx.index.desc.root); + int walk_ret = walk_directory_tree(ScanCtx.index.desc.root); + if (walk_ret == -1) { + LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno) + } tpool_wait(ScanCtx.pool); tpool_destroy(ScanCtx.pool); + LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count) + LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count) + LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count) + if (args->incremental != NULL) { char dst_path[PATH_MAX]; snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); @@ -528,6 +542,7 @@ int main(int argc, const char *argv[]) { "Maximum memory buffer size per thread in MB for files inside archives " "(see USAGE.md). DEFAULT: 2000"), OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), + OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, "Faster but less accurate EPUB parsing (no thumbnails, metadata)"), OPT_GROUP("Index options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), diff --git a/src/parsing/parse.c b/src/parsing/parse.c index ab0b36c..78a5375 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -71,6 +71,11 @@ void parse(void *arg) { int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5); if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5); + + pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); + ScanCtx.dbg_skipped_files_count += 1; + pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); + return; } @@ -112,6 +117,10 @@ void parse(void *arg) { } CLOSE_FILE(job->vfile) + + pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); + ScanCtx.dbg_failed_files_count += 1; + pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); return; } diff --git a/src/static/search.html b/src/static/search.html index 52e1a06..c7224cc 100644 --- a/src/static/search.html +++ b/src/static/search.html @@ -12,7 +12,7 @@