mirror of
				https://github.com/simon987/sist2.git
				synced 2025-10-31 16:06:53 +00:00 
			
		
		
		
	add fast-epub arg, tweak thread pool, error handling for nftw
This commit is contained in:
		
							parent
							
								
									ba31531d3a
								
							
						
					
					
						commit
						a2209e91ca
					
				| @ -232,6 +232,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|     LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) | ||||
|     LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) | ||||
|     LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) | ||||
|     LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub) | ||||
|     LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) | ||||
|     LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer) | ||||
| 
 | ||||
|  | ||||
| @ -27,6 +27,7 @@ typedef struct scan_args { | ||||
|     double treemap_threshold; | ||||
|     int max_memory_buffer; | ||||
|     int read_subtitles; | ||||
|     int fast_epub; | ||||
| } scan_args_t; | ||||
| 
 | ||||
| scan_args_t *scan_args_create(); | ||||
|  | ||||
| @ -43,6 +43,11 @@ typedef struct { | ||||
|     GHashTable *dbg_current_files; | ||||
|     pthread_mutex_t dbg_current_files_mu; | ||||
| 
 | ||||
|     int dbg_failed_files_count; | ||||
|     int dbg_skipped_files_count; | ||||
|     int dbg_excluded_files_count; | ||||
|     pthread_mutex_t dbg_file_counts_mu; | ||||
| 
 | ||||
|     scan_arc_ctx_t arc_ctx; | ||||
|     scan_comic_ctx_t comic_ctx; | ||||
|     scan_ebook_ctx_t ebook_ctx; | ||||
|  | ||||
| @ -41,6 +41,10 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st | ||||
| 
 | ||||
|         if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { | ||||
|             LOG_DEBUGF("walk.c", "Excluded: %s", filepath) | ||||
| 
 | ||||
|             pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|             ScanCtx.dbg_excluded_files_count += 1; | ||||
|             pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); | ||||
|             return 0; | ||||
|         } | ||||
| 
 | ||||
| @ -51,6 +55,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| #define MAX_FILE_DESCRIPTORS 64 | ||||
| 
 | ||||
| int walk_directory_tree(const char *dirpath) { | ||||
|     return nftw(dirpath, handle_entry, 15, FTW_PHYS); | ||||
|     return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH); | ||||
| } | ||||
|  | ||||
							
								
								
									
										75
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										75
									
								
								src/main.c
									
									
									
									
									
								
							| @ -21,7 +21,7 @@ | ||||
| #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" | ||||
| 
 | ||||
| 
 | ||||
| static const char *const Version = "2.10.2"; | ||||
| static const char *const Version = "2.10.3"; | ||||
| static const char *const usage[] = { | ||||
|         "sist2 scan [OPTION]... PATH", | ||||
|         "sist2 index [OPTION]... INDEX", | ||||
| @ -171,6 +171,8 @@ void initialize_scan_context(scan_args_t *args) { | ||||
|     ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL); | ||||
|     pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); | ||||
| 
 | ||||
|     pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); | ||||
| 
 | ||||
|     // Comic
 | ||||
|     ScanCtx.comic_ctx.log = _log; | ||||
|     ScanCtx.comic_ctx.logf = _logf; | ||||
| @ -189,6 +191,7 @@ void initialize_scan_context(scan_args_t *args) { | ||||
|     ScanCtx.ebook_ctx.log = _log; | ||||
|     ScanCtx.ebook_ctx.logf = _logf; | ||||
|     ScanCtx.ebook_ctx.store = _store; | ||||
|     ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub; | ||||
| 
 | ||||
|     // Font
 | ||||
|     ScanCtx.font_ctx.enable_tn = args->size > 0; | ||||
| @ -249,6 +252,37 @@ void initialize_scan_context(scan_args_t *args) { | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| void load_incremental_index(const scan_args_t *args) { | ||||
|     ScanCtx.original_table = incremental_get_table(); | ||||
|     ScanCtx.copy_table = incremental_get_table(); | ||||
| 
 | ||||
|     DIR *dir = opendir(args->incremental); | ||||
|     if (dir == NULL) { | ||||
|         LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno)) | ||||
|     } | ||||
| 
 | ||||
|     char descriptor_path[PATH_MAX]; | ||||
|     snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental); | ||||
|     index_descriptor_t original_desc = read_index_descriptor(descriptor_path); | ||||
| 
 | ||||
|     if (strcmp(original_desc.version, Version) != 0) { | ||||
|         LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version, | ||||
|                    Version, INDEX_VERSION_EXTERNAL) | ||||
|     } | ||||
| 
 | ||||
|     struct dirent *de; | ||||
|     while ((de = readdir(dir)) != NULL) { | ||||
|         if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { | ||||
|             char file_path[PATH_MAX]; | ||||
|             snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); | ||||
|             incremental_read(ScanCtx.original_table, file_path); | ||||
|         } | ||||
|     } | ||||
|     closedir(dir); | ||||
| 
 | ||||
|     LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) | ||||
| } | ||||
| 
 | ||||
| void sist2_scan(scan_args_t *args) { | ||||
| 
 | ||||
|     ScanCtx.mime_table = mime_get_mime_table(); | ||||
| @ -270,42 +304,22 @@ void sist2_scan(scan_args_t *args) { | ||||
|     scan_print_header(); | ||||
| 
 | ||||
|     if (args->incremental != NULL) { | ||||
|         ScanCtx.original_table = incremental_get_table(); | ||||
|         ScanCtx.copy_table = incremental_get_table(); | ||||
| 
 | ||||
|         DIR *dir = opendir(args->incremental); | ||||
|         if (dir == NULL) { | ||||
|             LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno)) | ||||
|         } | ||||
| 
 | ||||
|         char descriptor_path[PATH_MAX]; | ||||
|         snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental); | ||||
|         index_descriptor_t original_desc = read_index_descriptor(descriptor_path); | ||||
| 
 | ||||
|         if (strcmp(original_desc.version, Version) != 0) { | ||||
|             LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version, | ||||
|                        Version, INDEX_VERSION_EXTERNAL) | ||||
|         } | ||||
| 
 | ||||
|         struct dirent *de; | ||||
|         while ((de = readdir(dir)) != NULL) { | ||||
|             if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { | ||||
|                 char file_path[PATH_MAX]; | ||||
|                 snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); | ||||
|                 incremental_read(ScanCtx.original_table, file_path); | ||||
|             } | ||||
|         } | ||||
|         closedir(dir); | ||||
| 
 | ||||
|         LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) | ||||
|         load_incremental_index(args); | ||||
|     } | ||||
| 
 | ||||
|     ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE); | ||||
|     tpool_start(ScanCtx.pool); | ||||
|     walk_directory_tree(ScanCtx.index.desc.root); | ||||
|     int walk_ret = walk_directory_tree(ScanCtx.index.desc.root); | ||||
|     if (walk_ret == -1) { | ||||
|         LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno) | ||||
|     } | ||||
|     tpool_wait(ScanCtx.pool); | ||||
|     tpool_destroy(ScanCtx.pool); | ||||
| 
 | ||||
|     LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count) | ||||
|     LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count) | ||||
|     LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count) | ||||
| 
 | ||||
|     if (args->incremental != NULL) { | ||||
|         char dst_path[PATH_MAX]; | ||||
|         snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); | ||||
| @ -528,6 +542,7 @@ int main(int argc, const char *argv[]) { | ||||
|                         "Maximum memory buffer size per thread in MB for files inside archives " | ||||
|                         "(see USAGE.md). DEFAULT: 2000"), | ||||
|             OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), | ||||
|             OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, "Faster but less accurate EPUB parsing (no thumbnails, metadata)"), | ||||
| 
 | ||||
|             OPT_GROUP("Index options"), | ||||
|             OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), | ||||
|  | ||||
| @ -71,6 +71,11 @@ void parse(void *arg) { | ||||
|     int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5); | ||||
|     if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { | ||||
|         incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5); | ||||
| 
 | ||||
|         pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|         ScanCtx.dbg_skipped_files_count += 1; | ||||
|         pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); | ||||
| 
 | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
| @ -112,6 +117,10 @@ void parse(void *arg) { | ||||
|             } | ||||
| 
 | ||||
|             CLOSE_FILE(job->vfile) | ||||
| 
 | ||||
|             pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); | ||||
|             ScanCtx.dbg_failed_files_count += 1; | ||||
|             pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|  | ||||
| @ -12,7 +12,7 @@ | ||||
| 
 | ||||
| <nav class="navbar navbar-expand-lg"> | ||||
|     <a class="navbar-brand" href="/">sist2</a> | ||||
|     <span class="badge badge-pill version">2.10.2</span> | ||||
|     <span class="badge badge-pill version">2.10.3</span> | ||||
|     <span class="tagline">Lightning-fast file system indexer and search tool </span> | ||||
|     <a class="btn ml-auto" href="stats">Stats</a> | ||||
|     <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings | ||||
|  | ||||
| @ -10,7 +10,7 @@ | ||||
| 
 | ||||
| <nav class="navbar navbar-expand-lg"> | ||||
|     <a class="navbar-brand" href="/">sist2</a> | ||||
|     <span class="badge badge-pill version">2.10.2</span> | ||||
|     <span class="badge badge-pill version">2.10.3</span> | ||||
|     <span class="tagline">Lightning-fast file system indexer and search tool </span> | ||||
|     <a style="margin-left: auto" class="btn" href="/">Back</a> | ||||
|     <button class="btn" type="button" data-toggle="modal" data-target="#settings" | ||||
|  | ||||
							
								
								
									
										26
									
								
								src/tpool.c
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								src/tpool.c
									
									
									
									
									
								
							| @ -27,6 +27,7 @@ typedef struct tpool { | ||||
|     int thread_cnt; | ||||
|     int work_cnt; | ||||
|     int done_cnt; | ||||
|     int busy_cnt; | ||||
| 
 | ||||
|     int free_arg; | ||||
|     int stop; | ||||
| @ -56,6 +57,7 @@ void tpool_dump_debug_info(tpool_t *pool) { | ||||
|     LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt) | ||||
|     LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt) | ||||
|     LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt) | ||||
|     LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt) | ||||
|     LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop) | ||||
| } | ||||
| 
 | ||||
| @ -127,6 +129,10 @@ static void *tpool_worker(void *arg) { | ||||
|         } | ||||
| 
 | ||||
|         tpool_work_t *work = tpool_work_get(pool); | ||||
|         if (work != NULL) { | ||||
|             pool->busy_cnt += 1; | ||||
|         } | ||||
| 
 | ||||
|         pthread_mutex_unlock(&(pool->work_mutex)); | ||||
| 
 | ||||
|         if (work != NULL) { | ||||
| @ -143,6 +149,7 @@ static void *tpool_worker(void *arg) { | ||||
| 
 | ||||
|         pthread_mutex_lock(&(pool->work_mutex)); | ||||
|         if (work != NULL) { | ||||
|             pool->busy_cnt -= 1; | ||||
|             pool->done_cnt++; | ||||
|         } | ||||
| 
 | ||||
| @ -168,14 +175,14 @@ static void *tpool_worker(void *arg) { | ||||
| void tpool_wait(tpool_t *pool) { | ||||
|     LOG_INFO("tpool.c", "Waiting for worker threads to finish") | ||||
|     pthread_mutex_lock(&(pool->work_mutex)); | ||||
|     while (1) { | ||||
|     while (TRUE) { | ||||
|         if (pool->done_cnt < pool->work_cnt) { | ||||
|             pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); | ||||
|         } else { | ||||
|             usleep(500000); | ||||
|             if (pool->done_cnt == pool->work_cnt) { | ||||
|                 pool->stop = 1; | ||||
|                 usleep(1000000); | ||||
|             LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt); | ||||
| 
 | ||||
|             if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) { | ||||
|                 pool->stop = TRUE; | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
| @ -195,12 +202,16 @@ void tpool_destroy(tpool_t *pool) { | ||||
| 
 | ||||
|     pthread_mutex_lock(&(pool->work_mutex)); | ||||
|     tpool_work_t *work = pool->work_head; | ||||
|     int count = 0; | ||||
|     while (work != NULL) { | ||||
|         tpool_work_t *tmp = work->next; | ||||
|         free(work); | ||||
|         work = tmp; | ||||
|         count += 1; | ||||
|     } | ||||
| 
 | ||||
|     LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count); | ||||
| 
 | ||||
|     pthread_cond_broadcast(&(pool->has_work_cond)); | ||||
|     pthread_mutex_unlock(&(pool->work_mutex)); | ||||
| 
 | ||||
| @ -226,13 +237,14 @@ void tpool_destroy(tpool_t *pool) { | ||||
|  * Create a thread pool | ||||
|  * @param thread_cnt Worker threads count | ||||
|  */ | ||||
| tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) { | ||||
| tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg) { | ||||
| 
 | ||||
|     tpool_t *pool = malloc(sizeof(tpool_t)); | ||||
|     pool->thread_cnt = thread_cnt; | ||||
|     pool->work_cnt = 0; | ||||
|     pool->done_cnt = 0; | ||||
|     pool->stop = 0; | ||||
|     pool->busy_cnt = 0; | ||||
|     pool->stop = FALSE; | ||||
|     pool->free_arg = free_arg; | ||||
|     pool->cleanup_func = cleanup_func; | ||||
|     pool->threads = calloc(sizeof(pthread_t), thread_cnt); | ||||
|  | ||||
| @ -8,7 +8,7 @@ typedef struct tpool tpool_t; | ||||
| 
 | ||||
| typedef void (*thread_func_t)(void *arg); | ||||
| 
 | ||||
| tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg); | ||||
| tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg); | ||||
| void tpool_start(tpool_t *pool); | ||||
| void tpool_destroy(tpool_t *pool); | ||||
| 
 | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user