mirror of
https://github.com/simon987/sist2.git
synced 2025-12-11 14:38:54 +00:00
Fixes and cleanup
This commit is contained in:
@@ -142,7 +142,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *abs_output = abspath(args->output);
|
||||
if (args->incremental && abs_output == NULL) {
|
||||
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", abs_output);
|
||||
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
|
||||
args->incremental = FALSE;
|
||||
} else if (!args->incremental && abs_output != NULL) {
|
||||
LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);
|
||||
|
||||
@@ -14,6 +14,7 @@ typedef struct scan_args {
|
||||
int content_size;
|
||||
int threads;
|
||||
int incremental;
|
||||
int optimize_database;
|
||||
char *output;
|
||||
char *rewrite_url;
|
||||
char *name;
|
||||
|
||||
@@ -38,13 +38,6 @@ typedef struct {
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
pthread_mutex_t dbg_current_files_mu;
|
||||
|
||||
int dbg_failed_files_count;
|
||||
int dbg_skipped_files_count;
|
||||
int dbg_excluded_files_count;
|
||||
pthread_mutex_t dbg_file_counts_mu;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <time.h>
|
||||
|
||||
|
||||
|
||||
database_t *database_create(const char *filename, database_type_t type) {
|
||||
database_t *db = malloc(sizeof(database_t));
|
||||
|
||||
@@ -81,7 +80,7 @@ void database_initialize(database_t *db) {
|
||||
}
|
||||
|
||||
void database_open(database_t *db) {
|
||||
LOG_DEBUGF("tpool.c", "Opening database %s (%d)", db->filename, db->type);
|
||||
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
||||
|
||||
@@ -113,7 +112,8 @@ void database_open(database_t *db) {
|
||||
&db->write_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", -1,
|
||||
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
|
||||
-1,
|
||||
&db->write_thumbnail_stmt, NULL));
|
||||
|
||||
// Create functions
|
||||
@@ -186,12 +186,16 @@ void database_close(database_t *db, int optimize) {
|
||||
|
||||
if (optimize) {
|
||||
LOG_DEBUG("database.c", "Optimizing database");
|
||||
// TODO: This should be an optional argument
|
||||
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
sqlite3_close(db->db);
|
||||
|
||||
if (db->type == IPC_PRODUCER_DATABASE) {
|
||||
remove(db->filename);
|
||||
}
|
||||
|
||||
free(db);
|
||||
db = NULL;
|
||||
}
|
||||
@@ -202,11 +206,14 @@ void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *r
|
||||
|
||||
int ret = sqlite3_step(db->select_thumbnail_stmt);
|
||||
|
||||
// TODO: if row not found, return null
|
||||
if (ret != SQLITE_ROW) {
|
||||
LOG_FATALF("database.c", "FIXME: tn step returned %d", ret);
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
|
||||
*return_value_len = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
|
||||
const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
|
||||
|
||||
@@ -275,11 +282,47 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
return desc;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
char *database_delete_list_iter(database_iterator_t *iter) {
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||
char *id_heap = malloc(strlen(id) + 1);
|
||||
strcpy(id_heap, id);
|
||||
return id_heap;
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
iter->stmt = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
// TODO: remove mtime, size, _id from json_data
|
||||
// TODO optimization: remove mtime, size, _id from json_data
|
||||
|
||||
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
|
||||
" WHEN sc.json_data IS NULL THEN"
|
||||
@@ -494,10 +537,10 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
return NULL;
|
||||
} else {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
job = malloc(sizeof(*job));
|
||||
|
||||
const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
|
||||
@@ -511,9 +554,6 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
|
||||
job->bulk_line->next = NULL;
|
||||
|
||||
// TODO CRASH IF NOT OK
|
||||
sqlite3_step(db->pop_parse_job_stmt);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ typedef struct {
|
||||
pthread_mutex_t db_mutex;
|
||||
pthread_mutex_t index_db_mutex;
|
||||
pthread_cond_t has_work_cond;
|
||||
char current_job[256][PATH_MAX * 2];
|
||||
char current_job[MAX_THREADS][PATH_MAX * 2];
|
||||
} database_ipc_ctx_t;
|
||||
|
||||
typedef struct database {
|
||||
@@ -106,6 +106,14 @@ cJSON *database_document_iter(database_iterator_t *);
|
||||
#define database_document_iter_foreach(element, iter) \
|
||||
for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||
|
||||
char * database_delete_list_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_delete_list_iter_foreach(element, iter) \
|
||||
for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
|
||||
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db);
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db);
|
||||
|
||||
@@ -24,9 +24,6 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
} else if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
@@ -83,13 +80,6 @@ int iterate_file_list(void *input_file) {
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
|
||||
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
212
src/main.c
212
src/main.c
@@ -18,10 +18,6 @@
|
||||
|
||||
#include "src/database/database.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
@@ -32,77 +28,6 @@ static const char *const usage[] = {
|
||||
};
|
||||
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
|
||||
void sig_handler(int signum) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
LogCtx.very_verbose = TRUE;
|
||||
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
// TODO: Print debug info
|
||||
// if (ScanCtx.dbg_current_files != NULL) {
|
||||
// GHashTableIter iter;
|
||||
// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
//
|
||||
// void *key;
|
||||
// void *value;
|
||||
// while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
// parse_job_t *job = value;
|
||||
//
|
||||
// if (isatty(STDERR_FILENO)) {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
// 31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
// );
|
||||
// } else {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "THREAD [%04llX] was working on job %s",
|
||||
// key, job->filepath
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (ScanCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
}
|
||||
|
||||
if (IndexCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(IndexCtx.pool);
|
||||
}
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
);
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
);
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
);
|
||||
#endif
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
sigsegv_handler(signum);
|
||||
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
|
||||
sigabrt_handler(signum);
|
||||
}
|
||||
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void database_scan_begin(scan_args_t *args) {
|
||||
index_descriptor_t *desc = &ScanCtx.index.desc;
|
||||
|
||||
@@ -158,7 +83,7 @@ void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
|
||||
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
void log_callback(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
exit(-1);
|
||||
@@ -175,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
|
||||
}
|
||||
}
|
||||
|
||||
void _logf(const char *filepath, int level, char *format, ...) {
|
||||
void logf_callback(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_list args;
|
||||
|
||||
@@ -198,15 +123,13 @@ void _logf(const char *filepath, int level, char *format, ...) {
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
// TODO: shared
|
||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||
|
||||
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||
|
||||
// Archive
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.log = log_callback;
|
||||
ScanCtx.arc_ctx.logf = logf_callback;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
@@ -215,8 +138,8 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
}
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.log = log_callback;
|
||||
ScanCtx.comic_ctx.logf = logf_callback;
|
||||
ScanCtx.comic_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.comic_ctx.tn_size = args->tn_size;
|
||||
@@ -232,24 +155,24 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
}
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.log = log_callback;
|
||||
ScanCtx.ebook_ctx.logf = logf_callback;
|
||||
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
|
||||
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.log = log_callback;
|
||||
ScanCtx.font_ctx.logf = logf_callback;
|
||||
ScanCtx.font_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.media_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.media_ctx.tn_count = args->tn_count;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.log = log_callback;
|
||||
ScanCtx.media_ctx.logf = logf_callback;
|
||||
ScanCtx.media_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
@@ -264,24 +187,24 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.log = log_callback;
|
||||
ScanCtx.ooxml_ctx.logf = logf_callback;
|
||||
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
ScanCtx.mobi_ctx.log = log_callback;
|
||||
ScanCtx.mobi_ctx.logf = logf_callback;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
ScanCtx.text_ctx.log = log_callback;
|
||||
ScanCtx.text_ctx.logf = logf_callback;
|
||||
|
||||
// MSDOC
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.log = log_callback;
|
||||
ScanCtx.msdoc_ctx.logf = logf_callback;
|
||||
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
|
||||
|
||||
@@ -299,20 +222,20 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.raw_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.log = log_callback;
|
||||
ScanCtx.raw_ctx.logf = logf_callback;
|
||||
ScanCtx.raw_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
ScanCtx.wpd_ctx.log = _log;
|
||||
ScanCtx.wpd_ctx.logf = _logf;
|
||||
ScanCtx.wpd_ctx.log = log_callback;
|
||||
ScanCtx.wpd_ctx.logf = logf_callback;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
|
||||
|
||||
// Json
|
||||
ScanCtx.json_ctx.content_size = args->content_size;
|
||||
ScanCtx.json_ctx.log = _log;
|
||||
ScanCtx.json_ctx.logf = _logf;
|
||||
ScanCtx.json_ctx.log = log_callback;
|
||||
ScanCtx.json_ctx.logf = logf_callback;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
|
||||
}
|
||||
@@ -344,9 +267,6 @@ void sist2_scan(scan_args_t *args) {
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count);
|
||||
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count);
|
||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count);
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
|
||||
|
||||
@@ -358,7 +278,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
}
|
||||
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, TRUE);
|
||||
database_close(db, args->optimize_database);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
@@ -397,16 +317,19 @@ void sist2_index(index_args_t *args) {
|
||||
print_json(json, doc_id);
|
||||
} else {
|
||||
index_json(json, doc_id);
|
||||
cnt +=1;
|
||||
cnt += 1;
|
||||
}
|
||||
}
|
||||
|
||||
free(iterator);
|
||||
database_close(db, FALSE);
|
||||
|
||||
// Only read the _delete index if we're sending data to ES
|
||||
if (!args->print) {
|
||||
// TODO: (delete_list iterator)
|
||||
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
||||
database_delete_list_iter_foreach(id, del_iter) {
|
||||
delete_document(id);
|
||||
free(id);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
@@ -496,12 +419,7 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
|
||||
}
|
||||
}
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
// sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
// sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
@@ -521,36 +439,37 @@ int main(int argc, const char *argv[]) {
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
|
||||
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
|
||||
"Thumbnail size, in pixels. DEFAULT=500",
|
||||
"Thumbnail size, in pixels. DEFAULT: 552",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
|
||||
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
|
||||
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
|
||||
// TODO: Update help string
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
"If the output file path exists, only scan new or modified files."),
|
||||
OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
|
||||
"Defragment index file after scan to reduce its file size."),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
|
||||
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
|
||||
"Use 0 to only scan files in PATH. DEFAULT: -1"),
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
"skip: don't scan, list: only save file names as text, "
|
||||
"shallow: don't scan archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
|
||||
"Passphrase for encrypted archive files"),
|
||||
|
||||
@@ -559,8 +478,8 @@ int main(int argc, const char *argv[]) {
|
||||
"which are installed on your machine)"),
|
||||
OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
|
||||
OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
|
||||
@@ -568,19 +487,20 @@ int main(int argc, const char *argv[]) {
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
|
||||
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
|
||||
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
|
||||
" instead of normal directory traversal. Use '-' to read"
|
||||
" from stdin."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print,
|
||||
"Print JSON documents to stdout instead of indexing to elasticsearch."),
|
||||
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||
"Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
@@ -588,15 +508,15 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address,
|
||||
"Listen for connections on this address. DEFAULT: localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
|
||||
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
|
||||
@@ -609,10 +529,10 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
@@ -621,7 +541,11 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
struct argparse argparse;
|
||||
argparse_init(&argparse, options, usage, 0);
|
||||
argparse_describe(&argparse, DESCRIPTION, EPILOG);
|
||||
argparse_describe(
|
||||
&argparse,
|
||||
"\nLightning-fast file system indexer and search tool.",
|
||||
"\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
);
|
||||
argc = argparse_parse(&argparse, argc, argv);
|
||||
|
||||
if (arg_version) {
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
||||
|
||||
static int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->fd == -1) {
|
||||
SHA1_Init(&f->sha1_ctx);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
|
||||
const char *magic_buffers[1] = {magic_database_buffer,};
|
||||
size_t sizes[1] = {sizeof(magic_database_buffer),};
|
||||
|
||||
// TODO: check if we can reuse the magic instance
|
||||
// TODO optimisation: check if we can reuse the magic instance
|
||||
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
|
||||
|
||||
if (load_ret != 0) {
|
||||
|
||||
567
src/parsing/mime_generated.c
vendored
567
src/parsing/mime_generated.c
vendored
@@ -174,287 +174,291 @@ application_x_mach_binary=655526,
|
||||
application_x_mach_executable=655527,
|
||||
application_x_magic_cap_package_1_0=655528,
|
||||
application_x_mathcad=655529,
|
||||
application_x_maxis_dbpf=655530,
|
||||
application_x_meme=655531,
|
||||
application_x_midi=655532,
|
||||
application_x_mif=655533,
|
||||
application_x_mix_transfer=655534,
|
||||
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655536,
|
||||
application_x_ms_pdb=655537,
|
||||
application_x_ms_reader=655538,
|
||||
application_x_msaccess=655539,
|
||||
application_x_n64_rom=655540,
|
||||
application_x_navi_animation=655541,
|
||||
application_x_navidoc=655542,
|
||||
application_x_navimap=655543,
|
||||
application_x_navistyle=655544,
|
||||
application_x_nes_rom=655545,
|
||||
application_x_netcdf=655546,
|
||||
application_x_newton_compatible_pkg=655547,
|
||||
application_x_nintendo_ds_rom=655548,
|
||||
application_x_object=655549,
|
||||
application_x_omc=655550,
|
||||
application_x_omcdatamaker=655551,
|
||||
application_x_omcregerator=655552,
|
||||
application_x_pagemaker=655553,
|
||||
application_x_pcl=655554,
|
||||
application_x_pgp_keyring=655555,
|
||||
application_x_pixclscript=655556,
|
||||
application_x_pkcs7_certreqresp=655557,
|
||||
application_x_pkcs7_signature=655558,
|
||||
application_x_project=655559,
|
||||
application_x_qpro=655560,
|
||||
application_x_rar=655561 | 0x10000000,
|
||||
application_x_rpm=655562,
|
||||
application_x_sdp=655563,
|
||||
application_x_sea=655564,
|
||||
application_x_seelogo=655565,
|
||||
application_x_setupscript=655566,
|
||||
application_x_shar=655567,
|
||||
application_x_sharedlib=655568,
|
||||
application_x_shockwave_flash=655569,
|
||||
application_x_snappy_framed=655570,
|
||||
application_x_sprite=655571,
|
||||
application_x_sqlite3=655572,
|
||||
application_x_stargallery_thm=655573,
|
||||
application_x_stuffit=655574,
|
||||
application_x_sv4cpio=655575,
|
||||
application_x_sv4crc=655576,
|
||||
application_x_tar=655577 | 0x10000000,
|
||||
application_x_tbook=655578,
|
||||
application_x_terminfo=655579,
|
||||
application_x_terminfo2=655580,
|
||||
application_x_tex_tfm=655581,
|
||||
application_x_texinfo=655582,
|
||||
application_x_ustar=655583,
|
||||
application_x_visio=655584,
|
||||
application_x_vnd_audioexplosion_mzz=655585,
|
||||
application_x_vnd_ls_xpix=655586,
|
||||
application_x_vrml=655587,
|
||||
application_x_wais_source=655588,
|
||||
application_x_wine_extension_ini=655589,
|
||||
application_x_wintalk=655590,
|
||||
application_x_world=655591,
|
||||
application_x_wri=655592,
|
||||
application_x_x509_ca_cert=655593,
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_x_zstd_dictionary=655597,
|
||||
application_xml=655598,
|
||||
application_zip=655599 | 0x10000000,
|
||||
application_zlib=655600,
|
||||
audio_basic=458993 | 0x80000000,
|
||||
audio_it=458994,
|
||||
audio_make=458995,
|
||||
audio_mid=458996,
|
||||
audio_midi=458997,
|
||||
audio_mp4=458998,
|
||||
audio_mpeg=458999,
|
||||
audio_ogg=459000,
|
||||
audio_s3m=459001,
|
||||
audio_tsp_audio=459002,
|
||||
audio_tsplayer=459003,
|
||||
audio_vnd_qcelp=459004,
|
||||
audio_voxware=459005,
|
||||
audio_x_aiff=459006,
|
||||
audio_x_flac=459007,
|
||||
audio_x_gsm=459008,
|
||||
audio_x_hx_aac_adts=459009,
|
||||
audio_x_jam=459010,
|
||||
audio_x_liveaudio=459011,
|
||||
audio_x_m4a=459012,
|
||||
audio_x_midi=459013,
|
||||
audio_x_mod=459014,
|
||||
audio_x_mp4a_latm=459015,
|
||||
audio_x_mpeg_3=459016,
|
||||
audio_x_mpequrl=459017,
|
||||
audio_x_nspaudio=459018,
|
||||
audio_x_pn_realaudio=459019,
|
||||
audio_x_psid=459020,
|
||||
audio_x_realaudio=459021,
|
||||
audio_x_s3m=459022,
|
||||
audio_x_twinvq=459023,
|
||||
audio_x_twinvq_plugin=459024,
|
||||
audio_x_voc=459025,
|
||||
audio_x_wav=459026,
|
||||
audio_x_xbox_executable=459027 | 0x80000000,
|
||||
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||
audio_xm=459029,
|
||||
font_otf=327958 | 0x20000000,
|
||||
font_sfnt=327959 | 0x20000000,
|
||||
font_woff=327960 | 0x20000000,
|
||||
font_woff2=327961 | 0x20000000,
|
||||
image_bmp=524570,
|
||||
image_cmu_raster=524571,
|
||||
image_fif=524572,
|
||||
image_florian=524573,
|
||||
image_g3fax=524574,
|
||||
image_gif=524575,
|
||||
image_heic=524576,
|
||||
image_ief=524577,
|
||||
image_jpeg=524578,
|
||||
image_jutvision=524579,
|
||||
image_naplps=524580,
|
||||
image_pict=524581,
|
||||
image_png=524582,
|
||||
image_svg=524583 | 0x80000000,
|
||||
image_svg_xml=524584 | 0x80000000,
|
||||
image_tiff=524585,
|
||||
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||
image_vnd_djvu=524587 | 0x80000000,
|
||||
image_vnd_fpx=524588,
|
||||
image_vnd_microsoft_icon=524589,
|
||||
image_vnd_rn_realflash=524590,
|
||||
image_vnd_rn_realpix=524591,
|
||||
image_vnd_wap_wbmp=524592,
|
||||
image_vnd_xiff=524593,
|
||||
image_webp=524594,
|
||||
image_wmf=524595,
|
||||
image_x_3ds=524596,
|
||||
image_x_adobe_dng=524597 | 0x00800000,
|
||||
image_x_award_bioslogo=524598,
|
||||
image_x_canon_cr2=524599 | 0x00800000,
|
||||
image_x_canon_crw=524600 | 0x00800000,
|
||||
image_x_cmu_raster=524601,
|
||||
image_x_cur=524602,
|
||||
image_x_dcraw=524603 | 0x00800000,
|
||||
image_x_dwg=524604,
|
||||
image_x_eps=524605,
|
||||
image_x_epson_erf=524606 | 0x00800000,
|
||||
image_x_exr=524607,
|
||||
image_x_fuji_raf=524608 | 0x00800000,
|
||||
image_x_gem=524609,
|
||||
image_x_icns=524610,
|
||||
image_x_icon=524611 | 0x80000000,
|
||||
image_x_jg=524612,
|
||||
image_x_jps=524613,
|
||||
image_x_kodak_dcr=524614 | 0x00800000,
|
||||
image_x_kodak_k25=524615 | 0x00800000,
|
||||
image_x_kodak_kdc=524616 | 0x00800000,
|
||||
image_x_minolta_mrw=524617 | 0x00800000,
|
||||
image_x_ms_bmp=524618,
|
||||
image_x_niff=524619,
|
||||
image_x_nikon_nef=524620 | 0x00800000,
|
||||
image_x_olympus_orf=524621 | 0x00800000,
|
||||
image_x_panasonic_raw=524622 | 0x00800000,
|
||||
image_x_pcx=524623,
|
||||
image_x_pentax_pef=524624 | 0x00800000,
|
||||
image_x_pict=524625,
|
||||
image_x_portable_bitmap=524626,
|
||||
image_x_portable_graymap=524627,
|
||||
image_x_portable_pixmap=524628,
|
||||
image_x_quicktime=524629,
|
||||
image_x_rgb=524630,
|
||||
image_x_sigma_x3f=524631 | 0x00800000,
|
||||
image_x_sony_arw=524632 | 0x00800000,
|
||||
image_x_sony_sr2=524633 | 0x00800000,
|
||||
image_x_sony_srf=524634 | 0x00800000,
|
||||
image_x_tga=524635,
|
||||
image_x_tiff=524636,
|
||||
image_x_win_bitmap=524637,
|
||||
image_x_xcf=524638 | 0x80000000,
|
||||
image_x_xpixmap=524639 | 0x80000000,
|
||||
image_x_xwindowdump=524640,
|
||||
message_news=196961,
|
||||
message_rfc822=196962,
|
||||
model_vnd_dwf=65891,
|
||||
model_vnd_gdl=65892,
|
||||
model_vnd_gs_gdl=65893,
|
||||
model_vrml=65894,
|
||||
model_x_pov=65895,
|
||||
application_x_matlab_data=655530,
|
||||
application_x_maxis_dbpf=655531,
|
||||
application_x_meme=655532,
|
||||
application_x_midi=655533,
|
||||
application_x_mif=655534,
|
||||
application_x_mix_transfer=655535,
|
||||
application_x_mobipocket_ebook=655536 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655537,
|
||||
application_x_ms_pdb=655538,
|
||||
application_x_ms_reader=655539,
|
||||
application_x_msaccess=655540,
|
||||
application_x_n64_rom=655541,
|
||||
application_x_navi_animation=655542,
|
||||
application_x_navidoc=655543,
|
||||
application_x_navimap=655544,
|
||||
application_x_navistyle=655545,
|
||||
application_x_nes_rom=655546,
|
||||
application_x_netcdf=655547,
|
||||
application_x_newton_compatible_pkg=655548,
|
||||
application_x_nintendo_ds_rom=655549,
|
||||
application_x_object=655550,
|
||||
application_x_omc=655551,
|
||||
application_x_omcdatamaker=655552,
|
||||
application_x_omcregerator=655553,
|
||||
application_x_pagemaker=655554,
|
||||
application_x_pcl=655555,
|
||||
application_x_pgp_keyring=655556,
|
||||
application_x_pixclscript=655557,
|
||||
application_x_pkcs7_certreqresp=655558,
|
||||
application_x_pkcs7_signature=655559,
|
||||
application_x_project=655560,
|
||||
application_x_qpro=655561,
|
||||
application_x_rar=655562 | 0x10000000,
|
||||
application_x_rpm=655563,
|
||||
application_x_sdp=655564,
|
||||
application_x_sea=655565,
|
||||
application_x_seelogo=655566,
|
||||
application_x_setupscript=655567,
|
||||
application_x_shar=655568,
|
||||
application_x_sharedlib=655569,
|
||||
application_x_shockwave_flash=655570,
|
||||
application_x_snappy_framed=655571,
|
||||
application_x_sprite=655572,
|
||||
application_x_sqlite3=655573,
|
||||
application_x_stargallery_thm=655574,
|
||||
application_x_stuffit=655575,
|
||||
application_x_sv4cpio=655576,
|
||||
application_x_sv4crc=655577,
|
||||
application_x_tar=655578 | 0x10000000,
|
||||
application_x_tbook=655579,
|
||||
application_x_terminfo=655580,
|
||||
application_x_terminfo2=655581,
|
||||
application_x_tex_tfm=655582,
|
||||
application_x_texinfo=655583,
|
||||
application_x_ustar=655584,
|
||||
application_x_visio=655585,
|
||||
application_x_vnd_audioexplosion_mzz=655586,
|
||||
application_x_vnd_ls_xpix=655587,
|
||||
application_x_vrml=655588,
|
||||
application_x_wais_source=655589,
|
||||
application_x_wine_extension_ini=655590,
|
||||
application_x_wintalk=655591,
|
||||
application_x_world=655592,
|
||||
application_x_wri=655593,
|
||||
application_x_x509_ca_cert=655594,
|
||||
application_x_xz=655595 | 0x08000000,
|
||||
application_x_zip=655596,
|
||||
application_x_zstd=655597 | 0x08000000,
|
||||
application_x_zstd_dictionary=655598,
|
||||
application_xml=655599,
|
||||
application_zip=655600 | 0x10000000,
|
||||
application_zlib=655601,
|
||||
audio_basic=458994 | 0x80000000,
|
||||
audio_it=458995,
|
||||
audio_make=458996,
|
||||
audio_mid=458997,
|
||||
audio_midi=458998,
|
||||
audio_mp4=458999,
|
||||
audio_mpeg=459000,
|
||||
audio_ogg=459001,
|
||||
audio_s3m=459002,
|
||||
audio_tsp_audio=459003,
|
||||
audio_tsplayer=459004,
|
||||
audio_vnd_qcelp=459005,
|
||||
audio_voxware=459006,
|
||||
audio_x_aiff=459007,
|
||||
audio_x_flac=459008,
|
||||
audio_x_gsm=459009,
|
||||
audio_x_hx_aac_adts=459010,
|
||||
audio_x_jam=459011,
|
||||
audio_x_liveaudio=459012,
|
||||
audio_x_m4a=459013,
|
||||
audio_x_midi=459014,
|
||||
audio_x_mod=459015,
|
||||
audio_x_mp4a_latm=459016,
|
||||
audio_x_mpeg_3=459017,
|
||||
audio_x_mpequrl=459018,
|
||||
audio_x_nspaudio=459019,
|
||||
audio_x_pn_realaudio=459020,
|
||||
audio_x_psid=459021,
|
||||
audio_x_realaudio=459022,
|
||||
audio_x_s3m=459023,
|
||||
audio_x_twinvq=459024,
|
||||
audio_x_twinvq_plugin=459025,
|
||||
audio_x_voc=459026,
|
||||
audio_x_wav=459027,
|
||||
audio_x_xbox_executable=459028 | 0x80000000,
|
||||
audio_x_xbox360_executable=459029 | 0x80000000,
|
||||
audio_xm=459030,
|
||||
font_otf=327959 | 0x20000000,
|
||||
font_sfnt=327960 | 0x20000000,
|
||||
font_woff=327961 | 0x20000000,
|
||||
font_woff2=327962 | 0x20000000,
|
||||
image_bmp=524571,
|
||||
image_cmu_raster=524572,
|
||||
image_fif=524573,
|
||||
image_florian=524574,
|
||||
image_g3fax=524575,
|
||||
image_gif=524576,
|
||||
image_heic=524577,
|
||||
image_ief=524578,
|
||||
image_jpeg=524579,
|
||||
image_jutvision=524580,
|
||||
image_naplps=524581,
|
||||
image_pict=524582,
|
||||
image_png=524583,
|
||||
image_svg=524584 | 0x80000000,
|
||||
image_svg_xml=524585 | 0x80000000,
|
||||
image_tiff=524586,
|
||||
image_vnd_adobe_photoshop=524587 | 0x80000000,
|
||||
image_vnd_djvu=524588 | 0x80000000,
|
||||
image_vnd_fpx=524589,
|
||||
image_vnd_microsoft_icon=524590,
|
||||
image_vnd_rn_realflash=524591,
|
||||
image_vnd_rn_realpix=524592,
|
||||
image_vnd_wap_wbmp=524593,
|
||||
image_vnd_xiff=524594,
|
||||
image_webp=524595,
|
||||
image_wmf=524596,
|
||||
image_x_3ds=524597,
|
||||
image_x_adobe_dng=524598 | 0x00800000,
|
||||
image_x_award_bioslogo=524599,
|
||||
image_x_canon_cr2=524600 | 0x00800000,
|
||||
image_x_canon_crw=524601 | 0x00800000,
|
||||
image_x_cmu_raster=524602,
|
||||
image_x_cur=524603,
|
||||
image_x_dcraw=524604 | 0x00800000,
|
||||
image_x_dwg=524605,
|
||||
image_x_eps=524606,
|
||||
image_x_epson_erf=524607 | 0x00800000,
|
||||
image_x_exr=524608,
|
||||
image_x_fuji_raf=524609 | 0x00800000,
|
||||
image_x_gem=524610,
|
||||
image_x_icns=524611,
|
||||
image_x_icon=524612 | 0x80000000,
|
||||
image_x_jg=524613,
|
||||
image_x_jps=524614,
|
||||
image_x_kodak_dcr=524615 | 0x00800000,
|
||||
image_x_kodak_k25=524616 | 0x00800000,
|
||||
image_x_kodak_kdc=524617 | 0x00800000,
|
||||
image_x_minolta_mrw=524618 | 0x00800000,
|
||||
image_x_ms_bmp=524619,
|
||||
image_x_niff=524620,
|
||||
image_x_nikon_nef=524621 | 0x00800000,
|
||||
image_x_olympus_orf=524622 | 0x00800000,
|
||||
image_x_panasonic_raw=524623 | 0x00800000,
|
||||
image_x_pcx=524624,
|
||||
image_x_pentax_pef=524625 | 0x00800000,
|
||||
image_x_pict=524626,
|
||||
image_x_portable_bitmap=524627,
|
||||
image_x_portable_graymap=524628,
|
||||
image_x_portable_pixmap=524629,
|
||||
image_x_quicktime=524630,
|
||||
image_x_rgb=524631,
|
||||
image_x_sigma_x3f=524632 | 0x00800000,
|
||||
image_x_sony_arw=524633 | 0x00800000,
|
||||
image_x_sony_sr2=524634 | 0x00800000,
|
||||
image_x_sony_srf=524635 | 0x00800000,
|
||||
image_x_tga=524636,
|
||||
image_x_tiff=524637,
|
||||
image_x_win_bitmap=524638,
|
||||
image_x_xcf=524639 | 0x80000000,
|
||||
image_x_xpixmap=524640 | 0x80000000,
|
||||
image_x_xwindowdump=524641,
|
||||
message_news=196962,
|
||||
message_rfc822=196963,
|
||||
model_vnd_dwf=65892,
|
||||
model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590184,
|
||||
text_asp=590185,
|
||||
text_css=590186,
|
||||
text_html=590187 | 0x01000000,
|
||||
text_javascript=590188,
|
||||
text_mcf=590189,
|
||||
text_pascal=590190,
|
||||
text_plain=590191,
|
||||
text_richtext=590192,
|
||||
text_rtf=590193,
|
||||
text_scriplet=590194,
|
||||
text_tab_separated_values=590195,
|
||||
text_troff=590196,
|
||||
text_uri_list=590197,
|
||||
text_vnd_abc=590198,
|
||||
text_vnd_fmi_flexstor=590199,
|
||||
text_vnd_wap_wml=590200,
|
||||
text_vnd_wap_wmlscript=590201,
|
||||
text_webviewhtml=590202,
|
||||
text_x_Algol68=590203,
|
||||
text_x_asm=590204,
|
||||
text_x_audiosoft_intra=590205,
|
||||
text_x_awk=590206,
|
||||
text_x_bcpl=590207,
|
||||
text_x_c=590208,
|
||||
text_x_c__=590209,
|
||||
text_x_component=590210,
|
||||
text_x_diff=590211,
|
||||
text_x_fortran=590212,
|
||||
text_x_java=590213,
|
||||
text_x_la_asf=590214,
|
||||
text_x_lisp=590215,
|
||||
text_x_m=590216,
|
||||
text_x_m4=590217,
|
||||
text_x_makefile=590218,
|
||||
text_x_ms_regedit=590219,
|
||||
text_x_msdos_batch=590220,
|
||||
text_x_objective_c=590221,
|
||||
text_x_pascal=590222,
|
||||
text_x_perl=590223,
|
||||
text_x_php=590224,
|
||||
text_x_po=590225,
|
||||
text_x_python=590226,
|
||||
text_x_ruby=590227,
|
||||
text_x_sass=590228,
|
||||
text_x_scss=590229,
|
||||
text_x_server_parsed_html=590230,
|
||||
text_x_setext=590231,
|
||||
text_x_sgml=590232 | 0x01000000,
|
||||
text_x_shellscript=590233,
|
||||
text_x_speech=590234,
|
||||
text_x_tcl=590235,
|
||||
text_x_tex=590236,
|
||||
text_x_uil=590237,
|
||||
text_x_uuencode=590238,
|
||||
text_x_vcalendar=590239,
|
||||
text_x_vcard=590240,
|
||||
text_xml=590241 | 0x01000000,
|
||||
video_MP2T=393634,
|
||||
video_animaflex=393635,
|
||||
video_avi=393636,
|
||||
video_avs_video=393637,
|
||||
video_mp4=393638,
|
||||
video_mpeg=393639,
|
||||
video_quicktime=393640,
|
||||
video_vdo=393641,
|
||||
video_vivo=393642,
|
||||
video_vnd_rn_realvideo=393643,
|
||||
video_vosaic=393644,
|
||||
video_webm=393645,
|
||||
video_x_amt_demorun=393646,
|
||||
video_x_amt_showrun=393647,
|
||||
video_x_atomic3d_feature=393648,
|
||||
video_x_dl=393649,
|
||||
video_x_dv=393650,
|
||||
video_x_fli=393651,
|
||||
video_x_flv=393652,
|
||||
video_x_isvideo=393653,
|
||||
video_x_jng=393654 | 0x80000000,
|
||||
video_x_m4v=393655,
|
||||
video_x_matroska=393656,
|
||||
video_x_mng=393657,
|
||||
video_x_motion_jpeg=393658,
|
||||
video_x_ms_asf=393659,
|
||||
video_x_msvideo=393660,
|
||||
video_x_qtc=393661,
|
||||
video_x_sgi_movie=393662,
|
||||
x_epoc_x_sisx_app=721343,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
text_csv=590188,
|
||||
text_html=590189 | 0x01000000,
|
||||
text_javascript=590190,
|
||||
text_mcf=590191,
|
||||
text_pascal=590192,
|
||||
text_plain=590193,
|
||||
text_richtext=590194,
|
||||
text_rtf=590195,
|
||||
text_scriplet=590196,
|
||||
text_tab_separated_values=590197,
|
||||
text_troff=590198,
|
||||
text_uri_list=590199,
|
||||
text_vnd_abc=590200,
|
||||
text_vnd_fmi_flexstor=590201,
|
||||
text_vnd_wap_wml=590202,
|
||||
text_vnd_wap_wmlscript=590203,
|
||||
text_webviewhtml=590204,
|
||||
text_x_Algol68=590205,
|
||||
text_x_asm=590206,
|
||||
text_x_audiosoft_intra=590207,
|
||||
text_x_awk=590208,
|
||||
text_x_bcpl=590209,
|
||||
text_x_c=590210,
|
||||
text_x_c__=590211,
|
||||
text_x_component=590212,
|
||||
text_x_diff=590213,
|
||||
text_x_fortran=590214,
|
||||
text_x_java=590215,
|
||||
text_x_la_asf=590216,
|
||||
text_x_lisp=590217,
|
||||
text_x_m=590218,
|
||||
text_x_m4=590219,
|
||||
text_x_makefile=590220,
|
||||
text_x_ms_regedit=590221,
|
||||
text_x_msdos_batch=590222,
|
||||
text_x_objective_c=590223,
|
||||
text_x_pascal=590224,
|
||||
text_x_perl=590225,
|
||||
text_x_php=590226,
|
||||
text_x_po=590227,
|
||||
text_x_python=590228,
|
||||
text_x_ruby=590229,
|
||||
text_x_sass=590230,
|
||||
text_x_script_python=590231,
|
||||
text_x_scss=590232,
|
||||
text_x_server_parsed_html=590233,
|
||||
text_x_setext=590234,
|
||||
text_x_sgml=590235 | 0x01000000,
|
||||
text_x_shellscript=590236,
|
||||
text_x_speech=590237,
|
||||
text_x_tcl=590238,
|
||||
text_x_tex=590239,
|
||||
text_x_uil=590240,
|
||||
text_x_uuencode=590241,
|
||||
text_x_vcalendar=590242,
|
||||
text_x_vcard=590243,
|
||||
text_xml=590244 | 0x01000000,
|
||||
video_MP2T=393637,
|
||||
video_animaflex=393638,
|
||||
video_avi=393639,
|
||||
video_avs_video=393640,
|
||||
video_mp4=393641,
|
||||
video_mpeg=393642,
|
||||
video_quicktime=393643,
|
||||
video_vdo=393644,
|
||||
video_vivo=393645,
|
||||
video_vnd_rn_realvideo=393646,
|
||||
video_vosaic=393647,
|
||||
video_webm=393648,
|
||||
video_x_amt_demorun=393649,
|
||||
video_x_amt_showrun=393650,
|
||||
video_x_atomic3d_feature=393651,
|
||||
video_x_dl=393652,
|
||||
video_x_dv=393653,
|
||||
video_x_fli=393654,
|
||||
video_x_flv=393655,
|
||||
video_x_isvideo=393656,
|
||||
video_x_jng=393657 | 0x80000000,
|
||||
video_x_m4v=393658,
|
||||
video_x_matroska=393659,
|
||||
video_x_mng=393660,
|
||||
video_x_motion_jpeg=393661,
|
||||
video_x_ms_asf=393662,
|
||||
video_x_msvideo=393663,
|
||||
video_x_qtc=393664,
|
||||
video_x_sgi_movie=393665,
|
||||
x_epoc_x_sisx_app=721346,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_x_matlab_data: return "application/x-matlab-data";
|
||||
case application_arj: return "application/arj";
|
||||
case application_base64: return "application/base64";
|
||||
case application_binhex: return "application/binhex";
|
||||
@@ -802,6 +806,8 @@ case text_mcf: return "text/mcf";
|
||||
case text_pascal: return "text/pascal";
|
||||
case text_PGP: return "text/PGP";
|
||||
case text_plain: return "text/plain";
|
||||
case text_x_script_python: return "text/x-script.python";
|
||||
case text_csv: return "text/csv";
|
||||
case application_vnd_coffeescript: return "application/vnd.coffeescript";
|
||||
case text_richtext: return "text/richtext";
|
||||
case text_rtf: return "text/rtf";
|
||||
@@ -906,6 +912,7 @@ case image_x_epson_erf: return "image/x-epson-erf";
|
||||
case sist2_sidecar: return "sist2/sidecar";
|
||||
default: return NULL;}}
|
||||
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
|
||||
case 2495639202:return application_x_matlab_data;
|
||||
case 104524599:return application_arj;
|
||||
case 1388642652:return application_base64;
|
||||
case 3514823219:return application_binhex;
|
||||
@@ -1194,6 +1201,7 @@ case 398963028:return text_javascript;
|
||||
case 1431272808:return text_mcf;
|
||||
case 509266722:return text_pascal;
|
||||
case 1689700070:case 794565824:case 351504808:case 214229345:case 30677878:case 1835907068:case 1154021400:case 3992351814:case 2107886487:case 2202503947:case 999008199:case 473390917:case 3679822420:case 1465078094:case 1466496025:case 2277716423:case 157353380:case 2002237032:case 4216257084:case 590894066:case 987584319:case 2268432115:case 3551958239:case 1436306077:case 3060306774:case 808890964:case 2564639436:case 3322219037:case 3334425408:case 3818365258:case 1403162576:case 590812979:case 1800036834:case 144986711:case 621471808:case 449607278:case 2403297477:case 2529069283:case 3929123204:return text_plain;
|
||||
case 194218739:return text_x_script_python;
|
||||
case 1401235891:return application_vnd_coffeescript;
|
||||
case 196656302:case 1203117491:case 3183026384:return text_richtext;
|
||||
case 2119613712:return text_scriplet;
|
||||
@@ -1288,6 +1296,7 @@ case 142938048:return image_x_epson_erf;
|
||||
case 287571459:return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
|
||||
case 3272851765: return application_x_matlab_data;
|
||||
case 3812269631: return application_arj;
|
||||
case 2479484568: return application_base64;
|
||||
case 3891182180: return application_binhex;
|
||||
@@ -1635,6 +1644,8 @@ case 768274928: return text_mcf;
|
||||
case 3970938585: return text_pascal;
|
||||
case 1059844876: return text_PGP;
|
||||
case 1152832851: return text_plain;
|
||||
case 3112468514: return text_x_script_python;
|
||||
case 1881267919: return text_csv;
|
||||
case 2809123822: return application_vnd_coffeescript;
|
||||
case 4000659158: return text_richtext;
|
||||
case 1060344107: return text_rtf;
|
||||
|
||||
@@ -46,17 +46,13 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
|
||||
return FILETYPE_MEDIA;
|
||||
} else if (IS_PDF(mime)) {
|
||||
return FILETYPE_EBOOK;
|
||||
} else if (major_mime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
if (IS_MARKUP(mime)) {
|
||||
return FILETYPE_MARKUP;
|
||||
} else {
|
||||
return FILETYPE_TEXT;
|
||||
}
|
||||
|
||||
} else if (IS_MARKUP(mime)) {
|
||||
return FILETYPE_MARKUP;
|
||||
} else if (major_mime == MimeText) {
|
||||
return FILETYPE_TEXT;
|
||||
} else if (IS_FONT(mime)) {
|
||||
return FILETYPE_FONT;
|
||||
} else if (
|
||||
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
} else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(mime) ||
|
||||
(IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
|
||||
)) {
|
||||
@@ -98,10 +94,6 @@ int get_mime(parse_job_t *job) {
|
||||
}
|
||||
}
|
||||
|
||||
if (strlen(extension) == 0 && strlen(job->filepath + job->base) == 40) {
|
||||
fprintf(stderr, "GIT? %s", job->filepath);
|
||||
}
|
||||
|
||||
if (ScanCtx.fast) {
|
||||
return 0;
|
||||
}
|
||||
@@ -122,7 +114,6 @@ int get_mime(parse_job_t *job) {
|
||||
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc));
|
||||
}
|
||||
|
||||
|
||||
return GET_MIME_ERROR_FATAL;
|
||||
}
|
||||
|
||||
@@ -130,12 +121,13 @@ int get_mime(parse_job_t *job) {
|
||||
|
||||
if (magic_mime_str != NULL) {
|
||||
mime = (int) mime_get_mime_by_string(magic_mime_str);
|
||||
free(magic_mime_str);
|
||||
|
||||
if (mime == 0) {
|
||||
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
|
||||
free(magic_mime_str);
|
||||
return 0;
|
||||
}
|
||||
free(magic_mime_str);
|
||||
}
|
||||
|
||||
if (job->vfile.reset != NULL) {
|
||||
@@ -163,14 +155,11 @@ void parse(parse_job_t *job) {
|
||||
doc->meta_head = NULL;
|
||||
doc->meta_tail = NULL;
|
||||
doc->size = job->vfile.st_size;
|
||||
doc->mtime = (int) job->vfile.mtime;
|
||||
doc->mtime = job->vfile.mtime;
|
||||
doc->mime = get_mime(job);
|
||||
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
|
||||
|
||||
if (doc->mime == GET_MIME_ERROR_FATAL) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_failed_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
@@ -178,9 +167,6 @@ void parse(parse_job_t *job) {
|
||||
}
|
||||
|
||||
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_skipped_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
@@ -246,7 +232,7 @@ void parse(parse_job_t *job) {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent)
|
||||
APPEND_META((doc), meta_parent);
|
||||
}
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
@@ -254,7 +240,7 @@ void parse(parse_job_t *job) {
|
||||
if (job->vfile.has_checksum) {
|
||||
char sha1_digest_str[SHA1_STR_LENGTH];
|
||||
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
|
||||
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str)
|
||||
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
|
||||
}
|
||||
|
||||
write_document(doc);
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
#define MAX_THREADS (256)
|
||||
|
||||
#include "util.h"
|
||||
#include "log.h"
|
||||
#include "types.h"
|
||||
|
||||
153
src/tpool.c
153
src/tpool.c
@@ -6,7 +6,7 @@
|
||||
#include <sys/wait.h>
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#define BLANK_STR " "
|
||||
#define BLANK_STR " "
|
||||
|
||||
typedef struct {
|
||||
int thread_id;
|
||||
@@ -17,7 +17,6 @@ typedef struct {
|
||||
typedef struct tpool {
|
||||
pthread_t threads[256];
|
||||
int num_threads;
|
||||
int fork;
|
||||
|
||||
int print_progress;
|
||||
|
||||
@@ -32,6 +31,8 @@ typedef struct tpool {
|
||||
pthread_cond_t workers_initialized_cond;
|
||||
int busy_count;
|
||||
int initialized_count;
|
||||
int thread_id_to_pid_mapping[MAX_THREADS];
|
||||
char ipc_database_filepath[128];
|
||||
} *shm;
|
||||
} tpool_t;
|
||||
|
||||
@@ -43,11 +44,6 @@ void job_destroy(job_t *job) {
|
||||
free(job);
|
||||
}
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool) {
|
||||
// TODO
|
||||
LOG_DEBUGF("tpool.c", "pool->num_threads = %d", pool->num_threads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Push work object to thread pool
|
||||
*/
|
||||
@@ -130,108 +126,124 @@ static void worker_thread_loop(tpool_t *pool) {
|
||||
}
|
||||
|
||||
static void worker_proc_init(tpool_t *pool, int thread_id) {
|
||||
// TODO create PID -> thread_id mapping for signal handler
|
||||
pthread_mutex_lock(&pool->shm->data_mutex);
|
||||
pool->shm->thread_id_to_pid_mapping[thread_id] = getpid();
|
||||
pthread_mutex_unlock(&pool->shm->data_mutex);
|
||||
|
||||
ProcData.thread_id = thread_id;
|
||||
|
||||
if (ScanCtx.index.path[0] != '\0') {
|
||||
// TODO This should be closed in proc cleanup function
|
||||
ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE);
|
||||
ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_open(ProcData.index_db);
|
||||
}
|
||||
|
||||
// TODO /dev/shm
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_CONSUMER_DATABASE);
|
||||
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE);
|
||||
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_open(ProcData.ipc_db);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
}
|
||||
|
||||
void worker_proc_cleanup(tpool_t* pool) {
|
||||
void worker_proc_cleanup(tpool_t *pool) {
|
||||
if (ProcData.index_db != NULL) {
|
||||
database_close(ProcData.index_db, FALSE);
|
||||
}
|
||||
database_close(ProcData.ipc_db, FALSE);
|
||||
}
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
#define TPOOL_FORK
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Thread worker function
|
||||
*/
|
||||
static void *tpool_worker(void *arg) {
|
||||
tpool_t *pool = ((start_thread_arg_t *) arg)->pool;
|
||||
|
||||
if (pool->fork) {
|
||||
while (TRUE) {
|
||||
int pid = fork();
|
||||
#ifdef TPOOL_FORK
|
||||
while (TRUE) {
|
||||
int pid = fork();
|
||||
|
||||
if (pid == 0) {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
if (pid == 0) {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_proc_cleanup(pool);
|
||||
worker_proc_cleanup(pool);
|
||||
|
||||
exit(0);
|
||||
exit(0);
|
||||
|
||||
} else {
|
||||
int status;
|
||||
// TODO: On crash, print debug info and resume thread
|
||||
waitpid(pid, &status, 0);
|
||||
} else {
|
||||
int status;
|
||||
waitpid(pid, &status, 0);
|
||||
|
||||
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
|
||||
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
|
||||
pool->shm->ipc_ctx.completed_job_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
|
||||
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
|
||||
pool->shm->ipc_ctx.completed_job_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
if (WIFSIGNALED(status)) {
|
||||
// TODO: Get current_job based on PID
|
||||
const char *job_filepath = "TODO";
|
||||
|
||||
LOG_FATALF_NO_EXIT(
|
||||
"tpool.c",
|
||||
"Child process was terminated by signal (%s).\n"
|
||||
BLANK_STR "The process was working on %s",
|
||||
strsignal(WTERMSIG(status)),
|
||||
job_filepath
|
||||
);
|
||||
if (WIFSIGNALED(status)) {
|
||||
int crashed_thread_id = -1;
|
||||
for (int i = 0; i < MAX_THREADS; i++) {
|
||||
if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
|
||||
crashed_thread_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
const char *job_filepath;
|
||||
if (crashed_thread_id != -1) {
|
||||
job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id];
|
||||
} else {
|
||||
job_filepath = "unknown";
|
||||
}
|
||||
|
||||
LOG_FATALF_NO_EXIT(
|
||||
"tpool.c",
|
||||
"Child process crashed (%s).\n"
|
||||
BLANK_STR "The process was working on %s\n"
|
||||
BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n"
|
||||
BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n",
|
||||
strsignal(WTERMSIG(status)),
|
||||
job_filepath
|
||||
);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -295,13 +307,10 @@ void tpool_destroy(tpool_t *pool) {
|
||||
*/
|
||||
tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
|
||||
int fork = FALSE;
|
||||
|
||||
tpool_t *pool = malloc(sizeof(tpool_t));
|
||||
|
||||
pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
pool->fork = fork;
|
||||
pool->num_threads = thread_cnt;
|
||||
pool->shm->ipc_ctx.job_count = 0;
|
||||
pool->shm->ipc_ctx.no_more_jobs = FALSE;
|
||||
@@ -310,6 +319,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
pool->shm->job_type = JOB_UNDEFINED;
|
||||
memset(pool->threads, 0, sizeof(pool->threads));
|
||||
pool->print_progress = print_progress;
|
||||
sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
|
||||
|
||||
pthread_mutexattr_t mutexattr;
|
||||
pthread_mutexattr_init(&mutexattr);
|
||||
@@ -329,10 +339,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
pthread_cond_init(&(pool->shm->done_working_cond), &condattr);
|
||||
pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr);
|
||||
|
||||
remove("/dev/shm/ipc.sist2");
|
||||
remove("/dev/shm/ipc.sist2-wal");
|
||||
remove("/dev/shm/ipc.sist2-shm");
|
||||
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_PRODUCER_DATABASE);
|
||||
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE);
|
||||
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_initialize(ProcData.ipc_db);
|
||||
|
||||
|
||||
@@ -19,8 +19,6 @@ int tpool_add_work(tpool_t *pool, job_t *job);
|
||||
|
||||
void tpool_wait(tpool_t *pool);
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool);
|
||||
|
||||
void job_destroy(job_t *job);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user