Compare commits

..

9 Commits

14 changed files with 108 additions and 50 deletions

View File

@@ -51,7 +51,7 @@ sist2 (Simple incremental search tool)
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
1. *(or)* `docker pull simon987/sist2:2.10.3-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@@ -232,6 +232,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)

View File

@@ -27,6 +27,7 @@ typedef struct scan_args {
double treemap_threshold;
int max_memory_buffer;
int read_subtitles;
int fast_epub;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -41,6 +41,12 @@ typedef struct {
int fast;
GHashTable *dbg_current_files;
pthread_mutex_t dbg_current_files_mu;
int dbg_failed_files_count;
int dbg_skipped_files_count;
int dbg_excluded_files_count;
pthread_mutex_t dbg_file_counts_mu;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;

View File

@@ -41,6 +41,10 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_excluded_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return 0;
}
@@ -51,6 +55,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
return 0;
}
#define MAX_FILE_DESCRIPTORS 64
int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, 15, FTW_PHYS);
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_DEPTH);
}

View File

@@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.10.1";
static const char *const Version = "2.10.3";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -168,7 +168,10 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.arc_ctx.passphrase[0] = 0;
}
ScanCtx.dbg_current_files = g_hash_table_new(g_int64_hash, g_int64_equal);
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
// Comic
ScanCtx.comic_ctx.log = _log;
@@ -188,6 +191,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
@@ -248,6 +252,37 @@ void initialize_scan_context(scan_args_t *args) {
}
void load_incremental_index(const scan_args_t *args) {
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
@@ -269,42 +304,22 @@ void sist2_scan(scan_args_t *args) {
scan_print_header();
if (args->incremental != NULL) {
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
Version, INDEX_VERSION_EXTERNAL)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path);
}
}
closedir(dir);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
load_incremental_index(args);
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
if (walk_ret == -1) {
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
}
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -426,6 +441,7 @@ void sist2_exec_script(exec_args_t *args) {
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@@ -526,6 +542,7 @@ int main(int argc, const char *argv[]) {
"Maximum memory buffer size per thread in MB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -43,7 +43,9 @@ void fs_reset(struct vfile *f) {
void set_dbg_current_file(parse_job_t *job) {
unsigned long long pid = (unsigned long long) pthread_self();
pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
}
void parse(void *arg) {
@@ -69,6 +71,11 @@ void parse(void *arg) {
int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_skipped_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return;
}
@@ -110,6 +117,10 @@ void parse(void *arg) {
}
CLOSE_FILE(job->vfile)
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_failed_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
return;
}

View File

@@ -12,7 +12,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.10.1</span>
<span class="badge badge-pill version">2.10.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a class="btn ml-auto" href="stats">Stats</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings

View File

@@ -10,7 +10,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.10.1</span>
<span class="badge badge-pill version">2.10.3</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" class="btn" href="/">Back</a>
<button class="btn" type="button" data-toggle="modal" data-target="#settings"

View File

@@ -27,6 +27,7 @@ typedef struct tpool {
int thread_cnt;
int work_cnt;
int done_cnt;
int busy_cnt;
int free_arg;
int stop;
@@ -56,6 +57,7 @@ void tpool_dump_debug_info(tpool_t *pool) {
LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt)
LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
}
@@ -127,6 +129,10 @@ static void *tpool_worker(void *arg) {
}
tpool_work_t *work = tpool_work_get(pool);
if (work != NULL) {
pool->busy_cnt += 1;
}
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
@@ -143,6 +149,7 @@ static void *tpool_worker(void *arg) {
pthread_mutex_lock(&(pool->work_mutex));
if (work != NULL) {
pool->busy_cnt -= 1;
pool->done_cnt++;
}
@@ -168,14 +175,14 @@ static void *tpool_worker(void *arg) {
void tpool_wait(tpool_t *pool) {
LOG_INFO("tpool.c", "Waiting for worker threads to finish")
pthread_mutex_lock(&(pool->work_mutex));
while (1) {
while (TRUE) {
if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else {
usleep(500000);
if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(1000000);
LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt);
if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) {
pool->stop = TRUE;
break;
}
}
@@ -195,12 +202,16 @@ void tpool_destroy(tpool_t *pool) {
pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head;
int count = 0;
while (work != NULL) {
tpool_work_t *tmp = work->next;
free(work);
work = tmp;
count += 1;
}
LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count);
pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex));
@@ -226,13 +237,14 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->busy_cnt = 0;
pool->stop = FALSE;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);

View File

@@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *pool);

View File

@@ -178,6 +178,9 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
mg_send(nc, data, data_len);
free(data);
} else {
mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
return;
}
}
@@ -220,7 +223,10 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
dyn_buffer_t encoded = url_escape(url);
dyn_buffer_write_char(&encoded, '\0');
mg_http_reply(nc, 308, "Location: %s", encoded.buf);
char location_header[8192];
snprintf(location_header, sizeof(location_header), "Location: %s\r\n", encoded.buf);
mg_http_reply(nc, 308, location_header, "");
dyn_buffer_destroy(&encoded);
}
@@ -531,7 +537,7 @@ int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"", "");
mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"\r\n", "");
return FALSE;
}
return TRUE;
@@ -544,7 +550,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
if (WebCtx.auth_enabled == TRUE) {
if (!validate_auth(nc, hm)) {
nc->is_closing = 1;
return;
}
}
@@ -575,7 +580,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
stats_files(nc, hm);
} else if (mg_http_match_uri(hm, "/tag/*")) {
if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
nc->is_closing = 1;
return;
}
tag(nc, hm);

File diff suppressed because one or more lines are too long