Fixes and cleanup

This commit is contained in:
2023-04-10 11:04:16 -04:00
parent fc36f33d52
commit 300c70883d
29 changed files with 678 additions and 708 deletions

View File

@@ -142,7 +142,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_output = abspath(args->output);
if (args->incremental && abs_output == NULL) {
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", abs_output);
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
args->incremental = FALSE;
} else if (!args->incremental && abs_output != NULL) {
LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);

View File

@@ -14,6 +14,7 @@ typedef struct scan_args {
int content_size;
int threads;
int incremental;
int optimize_database;
char *output;
char *rewrite_url;
char *name;

View File

@@ -38,13 +38,6 @@ typedef struct {
pcre_extra *exclude_extra;
int fast;
pthread_mutex_t dbg_current_files_mu;
int dbg_failed_files_count;
int dbg_skipped_files_count;
int dbg_excluded_files_count;
pthread_mutex_t dbg_file_counts_mu;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;
scan_ebook_ctx_t ebook_ctx;

View File

@@ -8,7 +8,6 @@
#include <time.h>
database_t *database_create(const char *filename, database_type_t type) {
database_t *db = malloc(sizeof(database_t));
@@ -81,7 +80,7 @@ void database_initialize(database_t *db) {
}
void database_open(database_t *db) {
LOG_DEBUGF("tpool.c", "Opening database %s (%d)", db->filename, db->type);
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
@@ -113,7 +112,8 @@ void database_open(database_t *db) {
&db->write_document_stmt, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
db->db,
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", -1,
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
-1,
&db->write_thumbnail_stmt, NULL));
// Create functions
@@ -186,12 +186,16 @@ void database_close(database_t *db, int optimize) {
if (optimize) {
LOG_DEBUG("database.c", "Optimizing database");
// TODO: This should be an optional argument
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
}
sqlite3_close(db->db);
if (db->type == IPC_PRODUCER_DATABASE) {
remove(db->filename);
}
free(db);
db = NULL;
}
@@ -202,11 +206,14 @@ void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *r
int ret = sqlite3_step(db->select_thumbnail_stmt);
// TODO: if row not found, return null
if (ret != SQLITE_ROW) {
LOG_FATALF("database.c", "FIXME: tn step returned %d", ret);
if (ret == SQLITE_DONE) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
*return_value_len = 0;
return NULL;
}
CRASH_IF_STMT_FAIL(ret);
const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
@@ -275,11 +282,47 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
return desc;
}
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
iter->stmt = stmt;
iter->db = db;
return iter;
}
char *database_delete_list_iter(database_iterator_t *iter) {
int ret = sqlite3_step(iter->stmt);
if (ret == SQLITE_ROW) {
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
char *id_heap = malloc(strlen(id) + 1);
strcpy(id_heap, id);
return id_heap;
}
if (ret != SQLITE_DONE) {
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
}
if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
}
iter->stmt = NULL;
return NULL;
}
database_iterator_t *database_create_document_iterator(database_t *db) {
sqlite3_stmt *stmt;
// TODO: remove mtime, size, _id from json_data
// TODO optimization: remove mtime, size, _id from json_data
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
" WHEN sc.json_data IS NULL THEN"
@@ -494,10 +537,10 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
return NULL;
} else {
CRASH_IF_STMT_FAIL(ret);
}
CRASH_IF_STMT_FAIL(ret);
job = malloc(sizeof(*job));
const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
@@ -511,9 +554,6 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
job->bulk_line->next = NULL;
// TODO CRASH IF NOT OK
sqlite3_step(db->pop_parse_job_stmt);
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
}

View File

@@ -41,7 +41,7 @@ typedef struct {
pthread_mutex_t db_mutex;
pthread_mutex_t index_db_mutex;
pthread_cond_t has_work_cond;
char current_job[256][PATH_MAX * 2];
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
typedef struct database {
@@ -106,6 +106,14 @@ cJSON *database_document_iter(database_iterator_t *);
#define database_document_iter_foreach(element, iter) \
for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
database_iterator_t *database_create_delete_list_iterator(database_t *db);
char * database_delete_list_iter(database_iterator_t *iter);
#define database_delete_list_iter_foreach(element, iter) \
for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
cJSON *database_incremental_scan_begin(database_t *db);
cJSON *database_incremental_scan_end(database_t *db);

View File

@@ -24,9 +24,6 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_excluded_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
} else if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}
@@ -83,13 +80,6 @@ int iterate_file_list(void *input_file) {
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
if (S_ISREG(info.st_mode)) {
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_excluded_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
}
continue;
}

View File

@@ -18,10 +18,6 @@
#include "src/database/database.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
@@ -32,77 +28,6 @@ static const char *const usage[] = {
};
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
// TODO: Print debug info
// if (ScanCtx.dbg_current_files != NULL) {
// GHashTableIter iter;
// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
//
// void *key;
// void *value;
// while (g_hash_table_iter_next(&iter, &key, &value)) {
// parse_job_t *job = value;
//
// if (isatty(STDERR_FILENO)) {
// LOG_DEBUGF(
// "*SIGNAL HANDLER*",
// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
// 31 + ((unsigned int) key) % 7, key, job->filepath
// );
// } else {
// LOG_DEBUGF(
// "*SIGNAL HANDLER*",
// "THREAD [%04llX] was working on job %s",
// key, job->filepath
// );
// }
// }
// }
if (ScanCtx.pool != NULL) {
tpool_dump_debug_info(ScanCtx.pool);
}
if (IndexCtx.pool != NULL) {
tpool_dump_debug_info(IndexCtx.pool);
}
LOG_INFO(
"*SIGNAL HANDLER*",
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
);
LOG_INFO(
"*SIGNAL HANDLER*",
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
);
#ifndef SIST_DEBUG
LOG_WARNING(
"*SIGNAL HANDLER*",
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
"releases page to provide additionnal information when submitting a bug report."
);
#endif
if (signum == SIGSEGV && sigsegv_handler != NULL) {
sigsegv_handler(signum);
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
sigabrt_handler(signum);
}
exit(-1);
}
void database_scan_begin(scan_args_t *args) {
index_descriptor_t *desc = &ScanCtx.index.desc;
@@ -158,7 +83,7 @@ void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
}
void _log(const char *filepath, int level, char *str) {
void log_callback(const char *filepath, int level, char *str) {
if (level == LEVEL_FATAL) {
sist_log(filepath, level, str);
exit(-1);
@@ -175,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
}
}
void _logf(const char *filepath, int level, char *format, ...) {
void logf_callback(const char *filepath, int level, char *format, ...) {
va_list args;
@@ -198,15 +123,13 @@ void _logf(const char *filepath, int level, char *format, ...) {
}
void initialize_scan_context(scan_args_t *args) {
// TODO: shared
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
// Archive
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
ScanCtx.arc_ctx.logf = _logf;
ScanCtx.arc_ctx.log = log_callback;
ScanCtx.arc_ctx.logf = logf_callback;
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
if (args->archive_passphrase != NULL) {
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
@@ -215,8 +138,8 @@ void initialize_scan_context(scan_args_t *args) {
}
// Comic
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.log = log_callback;
ScanCtx.comic_ctx.logf = logf_callback;
ScanCtx.comic_ctx.store = write_thumbnail_callback;
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
ScanCtx.comic_ctx.tn_size = args->tn_size;
@@ -232,24 +155,24 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
}
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.log = log_callback;
ScanCtx.ebook_ctx.logf = logf_callback;
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
// Font
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.log = log_callback;
ScanCtx.font_ctx.logf = logf_callback;
ScanCtx.font_ctx.store = write_thumbnail_callback;
// Media
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
ScanCtx.media_ctx.tn_size = args->tn_size;
ScanCtx.media_ctx.tn_count = args->tn_count;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.log = log_callback;
ScanCtx.media_ctx.logf = logf_callback;
ScanCtx.media_ctx.store = write_thumbnail_callback;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
@@ -264,24 +187,24 @@ void initialize_scan_context(scan_args_t *args) {
// OOXML
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
ScanCtx.ooxml_ctx.log = log_callback;
ScanCtx.ooxml_ctx.logf = logf_callback;
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
// MOBI
ScanCtx.mobi_ctx.content_size = args->content_size;
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
ScanCtx.mobi_ctx.log = log_callback;
ScanCtx.mobi_ctx.logf = logf_callback;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.text_ctx.log = log_callback;
ScanCtx.text_ctx.logf = logf_callback;
// MSDOC
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
ScanCtx.msdoc_ctx.log = log_callback;
ScanCtx.msdoc_ctx.logf = logf_callback;
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
@@ -299,20 +222,20 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
ScanCtx.raw_ctx.tn_size = args->tn_size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.log = log_callback;
ScanCtx.raw_ctx.logf = logf_callback;
ScanCtx.raw_ctx.store = write_thumbnail_callback;
// Wpd
ScanCtx.wpd_ctx.content_size = args->content_size;
ScanCtx.wpd_ctx.log = _log;
ScanCtx.wpd_ctx.logf = _logf;
ScanCtx.wpd_ctx.log = log_callback;
ScanCtx.wpd_ctx.logf = logf_callback;
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
// Json
ScanCtx.json_ctx.content_size = args->content_size;
ScanCtx.json_ctx.log = _log;
ScanCtx.json_ctx.logf = _logf;
ScanCtx.json_ctx.log = log_callback;
ScanCtx.json_ctx.logf = logf_callback;
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
}
@@ -344,9 +267,6 @@ void sist2_scan(scan_args_t *args) {
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count);
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count);
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count);
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
@@ -358,7 +278,7 @@ void sist2_scan(scan_args_t *args) {
}
database_generate_stats(db, args->treemap_threshold);
database_close(db, TRUE);
database_close(db, args->optimize_database);
}
void sist2_index(index_args_t *args) {
@@ -397,16 +317,19 @@ void sist2_index(index_args_t *args) {
print_json(json, doc_id);
} else {
index_json(json, doc_id);
cnt +=1;
cnt += 1;
}
}
free(iterator);
database_close(db, FALSE);
// Only read the _delete index if we're sending data to ES
if (!args->print) {
// TODO: (delete_list iterator)
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
database_delete_list_iter_foreach(id, del_iter) {
delete_document(id);
free(id);
}
}
tpool_wait(IndexCtx.pool);
@@ -496,12 +419,7 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
}
}
#include <zlib.h>
int main(int argc, const char *argv[]) {
// sigsegv_handler = signal(SIGSEGV, sig_handler);
// sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, "");
scan_args_t *scan_args = scan_args_create();
@@ -521,36 +439,37 @@ int main(int argc, const char *argv[]) {
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",
"Thumbnail size, in pixels. DEFAULT: 552",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
// TODO: Update help string
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
"If the output file path exists, only scan new or modified files."),
OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
"Defragment index file after scan to reduce its file size."),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
"skip: Don't parse, list: only get file names as text, "
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
"skip: don't scan, list: only save file names as text, "
"shallow: don't scan archives inside archives. DEFAULT: recurse"),
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
@@ -559,8 +478,8 @@ int main(int argc, const char *argv[]) {
"which are installed on your machine)"),
OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
@@ -568,19 +487,20 @@ int main(int argc, const char *argv[]) {
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
"Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
" instead of normal directory traversal. Use '-' to read"
" from stdin."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
OPT_BOOLEAN('p', "print", &index_args->print,
"Print JSON documents to stdout instead of indexing to elasticsearch."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
"Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
@@ -588,15 +508,15 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
OPT_STRING(0, "bind", &web_args->listen_address,
"Listen for connections on this address. DEFAULT: localhost:4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
@@ -609,10 +529,10 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
"Do not verify SSL connections to Elasticsearch."),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -621,7 +541,11 @@ int main(int argc, const char *argv[]) {
struct argparse argparse;
argparse_init(&argparse, options, usage, 0);
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argparse_describe(
&argparse,
"\nLightning-fast file system indexer and search tool.",
"\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
);
argc = argparse_parse(&argparse, argc, argv);
if (arg_version) {

View File

@@ -6,7 +6,6 @@
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
static int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
SHA1_Init(&f->sha1_ctx);

View File

@@ -12,7 +12,7 @@ char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
const char *magic_buffers[1] = {magic_database_buffer,};
size_t sizes[1] = {sizeof(magic_database_buffer),};
// TODO: check if we can reuse the magic instance
// TODO optimisation: check if we can reuse the magic instance
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
if (load_ret != 0) {

View File

@@ -174,287 +174,291 @@ application_x_mach_binary=655526,
application_x_mach_executable=655527,
application_x_magic_cap_package_1_0=655528,
application_x_mathcad=655529,
application_x_maxis_dbpf=655530,
application_x_meme=655531,
application_x_midi=655532,
application_x_mif=655533,
application_x_mix_transfer=655534,
application_x_mobipocket_ebook=655535 | 0x02000000,
application_x_ms_compress_szdd=655536,
application_x_ms_pdb=655537,
application_x_ms_reader=655538,
application_x_msaccess=655539,
application_x_n64_rom=655540,
application_x_navi_animation=655541,
application_x_navidoc=655542,
application_x_navimap=655543,
application_x_navistyle=655544,
application_x_nes_rom=655545,
application_x_netcdf=655546,
application_x_newton_compatible_pkg=655547,
application_x_nintendo_ds_rom=655548,
application_x_object=655549,
application_x_omc=655550,
application_x_omcdatamaker=655551,
application_x_omcregerator=655552,
application_x_pagemaker=655553,
application_x_pcl=655554,
application_x_pgp_keyring=655555,
application_x_pixclscript=655556,
application_x_pkcs7_certreqresp=655557,
application_x_pkcs7_signature=655558,
application_x_project=655559,
application_x_qpro=655560,
application_x_rar=655561 | 0x10000000,
application_x_rpm=655562,
application_x_sdp=655563,
application_x_sea=655564,
application_x_seelogo=655565,
application_x_setupscript=655566,
application_x_shar=655567,
application_x_sharedlib=655568,
application_x_shockwave_flash=655569,
application_x_snappy_framed=655570,
application_x_sprite=655571,
application_x_sqlite3=655572,
application_x_stargallery_thm=655573,
application_x_stuffit=655574,
application_x_sv4cpio=655575,
application_x_sv4crc=655576,
application_x_tar=655577 | 0x10000000,
application_x_tbook=655578,
application_x_terminfo=655579,
application_x_terminfo2=655580,
application_x_tex_tfm=655581,
application_x_texinfo=655582,
application_x_ustar=655583,
application_x_visio=655584,
application_x_vnd_audioexplosion_mzz=655585,
application_x_vnd_ls_xpix=655586,
application_x_vrml=655587,
application_x_wais_source=655588,
application_x_wine_extension_ini=655589,
application_x_wintalk=655590,
application_x_world=655591,
application_x_wri=655592,
application_x_x509_ca_cert=655593,
application_x_xz=655594 | 0x08000000,
application_x_zip=655595,
application_x_zstd=655596 | 0x08000000,
application_x_zstd_dictionary=655597,
application_xml=655598,
application_zip=655599 | 0x10000000,
application_zlib=655600,
audio_basic=458993 | 0x80000000,
audio_it=458994,
audio_make=458995,
audio_mid=458996,
audio_midi=458997,
audio_mp4=458998,
audio_mpeg=458999,
audio_ogg=459000,
audio_s3m=459001,
audio_tsp_audio=459002,
audio_tsplayer=459003,
audio_vnd_qcelp=459004,
audio_voxware=459005,
audio_x_aiff=459006,
audio_x_flac=459007,
audio_x_gsm=459008,
audio_x_hx_aac_adts=459009,
audio_x_jam=459010,
audio_x_liveaudio=459011,
audio_x_m4a=459012,
audio_x_midi=459013,
audio_x_mod=459014,
audio_x_mp4a_latm=459015,
audio_x_mpeg_3=459016,
audio_x_mpequrl=459017,
audio_x_nspaudio=459018,
audio_x_pn_realaudio=459019,
audio_x_psid=459020,
audio_x_realaudio=459021,
audio_x_s3m=459022,
audio_x_twinvq=459023,
audio_x_twinvq_plugin=459024,
audio_x_voc=459025,
audio_x_wav=459026,
audio_x_xbox_executable=459027 | 0x80000000,
audio_x_xbox360_executable=459028 | 0x80000000,
audio_xm=459029,
font_otf=327958 | 0x20000000,
font_sfnt=327959 | 0x20000000,
font_woff=327960 | 0x20000000,
font_woff2=327961 | 0x20000000,
image_bmp=524570,
image_cmu_raster=524571,
image_fif=524572,
image_florian=524573,
image_g3fax=524574,
image_gif=524575,
image_heic=524576,
image_ief=524577,
image_jpeg=524578,
image_jutvision=524579,
image_naplps=524580,
image_pict=524581,
image_png=524582,
image_svg=524583 | 0x80000000,
image_svg_xml=524584 | 0x80000000,
image_tiff=524585,
image_vnd_adobe_photoshop=524586 | 0x80000000,
image_vnd_djvu=524587 | 0x80000000,
image_vnd_fpx=524588,
image_vnd_microsoft_icon=524589,
image_vnd_rn_realflash=524590,
image_vnd_rn_realpix=524591,
image_vnd_wap_wbmp=524592,
image_vnd_xiff=524593,
image_webp=524594,
image_wmf=524595,
image_x_3ds=524596,
image_x_adobe_dng=524597 | 0x00800000,
image_x_award_bioslogo=524598,
image_x_canon_cr2=524599 | 0x00800000,
image_x_canon_crw=524600 | 0x00800000,
image_x_cmu_raster=524601,
image_x_cur=524602,
image_x_dcraw=524603 | 0x00800000,
image_x_dwg=524604,
image_x_eps=524605,
image_x_epson_erf=524606 | 0x00800000,
image_x_exr=524607,
image_x_fuji_raf=524608 | 0x00800000,
image_x_gem=524609,
image_x_icns=524610,
image_x_icon=524611 | 0x80000000,
image_x_jg=524612,
image_x_jps=524613,
image_x_kodak_dcr=524614 | 0x00800000,
image_x_kodak_k25=524615 | 0x00800000,
image_x_kodak_kdc=524616 | 0x00800000,
image_x_minolta_mrw=524617 | 0x00800000,
image_x_ms_bmp=524618,
image_x_niff=524619,
image_x_nikon_nef=524620 | 0x00800000,
image_x_olympus_orf=524621 | 0x00800000,
image_x_panasonic_raw=524622 | 0x00800000,
image_x_pcx=524623,
image_x_pentax_pef=524624 | 0x00800000,
image_x_pict=524625,
image_x_portable_bitmap=524626,
image_x_portable_graymap=524627,
image_x_portable_pixmap=524628,
image_x_quicktime=524629,
image_x_rgb=524630,
image_x_sigma_x3f=524631 | 0x00800000,
image_x_sony_arw=524632 | 0x00800000,
image_x_sony_sr2=524633 | 0x00800000,
image_x_sony_srf=524634 | 0x00800000,
image_x_tga=524635,
image_x_tiff=524636,
image_x_win_bitmap=524637,
image_x_xcf=524638 | 0x80000000,
image_x_xpixmap=524639 | 0x80000000,
image_x_xwindowdump=524640,
message_news=196961,
message_rfc822=196962,
model_vnd_dwf=65891,
model_vnd_gdl=65892,
model_vnd_gs_gdl=65893,
model_vrml=65894,
model_x_pov=65895,
application_x_matlab_data=655530,
application_x_maxis_dbpf=655531,
application_x_meme=655532,
application_x_midi=655533,
application_x_mif=655534,
application_x_mix_transfer=655535,
application_x_mobipocket_ebook=655536 | 0x02000000,
application_x_ms_compress_szdd=655537,
application_x_ms_pdb=655538,
application_x_ms_reader=655539,
application_x_msaccess=655540,
application_x_n64_rom=655541,
application_x_navi_animation=655542,
application_x_navidoc=655543,
application_x_navimap=655544,
application_x_navistyle=655545,
application_x_nes_rom=655546,
application_x_netcdf=655547,
application_x_newton_compatible_pkg=655548,
application_x_nintendo_ds_rom=655549,
application_x_object=655550,
application_x_omc=655551,
application_x_omcdatamaker=655552,
application_x_omcregerator=655553,
application_x_pagemaker=655554,
application_x_pcl=655555,
application_x_pgp_keyring=655556,
application_x_pixclscript=655557,
application_x_pkcs7_certreqresp=655558,
application_x_pkcs7_signature=655559,
application_x_project=655560,
application_x_qpro=655561,
application_x_rar=655562 | 0x10000000,
application_x_rpm=655563,
application_x_sdp=655564,
application_x_sea=655565,
application_x_seelogo=655566,
application_x_setupscript=655567,
application_x_shar=655568,
application_x_sharedlib=655569,
application_x_shockwave_flash=655570,
application_x_snappy_framed=655571,
application_x_sprite=655572,
application_x_sqlite3=655573,
application_x_stargallery_thm=655574,
application_x_stuffit=655575,
application_x_sv4cpio=655576,
application_x_sv4crc=655577,
application_x_tar=655578 | 0x10000000,
application_x_tbook=655579,
application_x_terminfo=655580,
application_x_terminfo2=655581,
application_x_tex_tfm=655582,
application_x_texinfo=655583,
application_x_ustar=655584,
application_x_visio=655585,
application_x_vnd_audioexplosion_mzz=655586,
application_x_vnd_ls_xpix=655587,
application_x_vrml=655588,
application_x_wais_source=655589,
application_x_wine_extension_ini=655590,
application_x_wintalk=655591,
application_x_world=655592,
application_x_wri=655593,
application_x_x509_ca_cert=655594,
application_x_xz=655595 | 0x08000000,
application_x_zip=655596,
application_x_zstd=655597 | 0x08000000,
application_x_zstd_dictionary=655598,
application_xml=655599,
application_zip=655600 | 0x10000000,
application_zlib=655601,
audio_basic=458994 | 0x80000000,
audio_it=458995,
audio_make=458996,
audio_mid=458997,
audio_midi=458998,
audio_mp4=458999,
audio_mpeg=459000,
audio_ogg=459001,
audio_s3m=459002,
audio_tsp_audio=459003,
audio_tsplayer=459004,
audio_vnd_qcelp=459005,
audio_voxware=459006,
audio_x_aiff=459007,
audio_x_flac=459008,
audio_x_gsm=459009,
audio_x_hx_aac_adts=459010,
audio_x_jam=459011,
audio_x_liveaudio=459012,
audio_x_m4a=459013,
audio_x_midi=459014,
audio_x_mod=459015,
audio_x_mp4a_latm=459016,
audio_x_mpeg_3=459017,
audio_x_mpequrl=459018,
audio_x_nspaudio=459019,
audio_x_pn_realaudio=459020,
audio_x_psid=459021,
audio_x_realaudio=459022,
audio_x_s3m=459023,
audio_x_twinvq=459024,
audio_x_twinvq_plugin=459025,
audio_x_voc=459026,
audio_x_wav=459027,
audio_x_xbox_executable=459028 | 0x80000000,
audio_x_xbox360_executable=459029 | 0x80000000,
audio_xm=459030,
font_otf=327959 | 0x20000000,
font_sfnt=327960 | 0x20000000,
font_woff=327961 | 0x20000000,
font_woff2=327962 | 0x20000000,
image_bmp=524571,
image_cmu_raster=524572,
image_fif=524573,
image_florian=524574,
image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
sist2_sidecar=2,
text_PGP=590184,
text_asp=590185,
text_css=590186,
text_html=590187 | 0x01000000,
text_javascript=590188,
text_mcf=590189,
text_pascal=590190,
text_plain=590191,
text_richtext=590192,
text_rtf=590193,
text_scriplet=590194,
text_tab_separated_values=590195,
text_troff=590196,
text_uri_list=590197,
text_vnd_abc=590198,
text_vnd_fmi_flexstor=590199,
text_vnd_wap_wml=590200,
text_vnd_wap_wmlscript=590201,
text_webviewhtml=590202,
text_x_Algol68=590203,
text_x_asm=590204,
text_x_audiosoft_intra=590205,
text_x_awk=590206,
text_x_bcpl=590207,
text_x_c=590208,
text_x_c__=590209,
text_x_component=590210,
text_x_diff=590211,
text_x_fortran=590212,
text_x_java=590213,
text_x_la_asf=590214,
text_x_lisp=590215,
text_x_m=590216,
text_x_m4=590217,
text_x_makefile=590218,
text_x_ms_regedit=590219,
text_x_msdos_batch=590220,
text_x_objective_c=590221,
text_x_pascal=590222,
text_x_perl=590223,
text_x_php=590224,
text_x_po=590225,
text_x_python=590226,
text_x_ruby=590227,
text_x_sass=590228,
text_x_scss=590229,
text_x_server_parsed_html=590230,
text_x_setext=590231,
text_x_sgml=590232 | 0x01000000,
text_x_shellscript=590233,
text_x_speech=590234,
text_x_tcl=590235,
text_x_tex=590236,
text_x_uil=590237,
text_x_uuencode=590238,
text_x_vcalendar=590239,
text_x_vcard=590240,
text_xml=590241 | 0x01000000,
video_MP2T=393634,
video_animaflex=393635,
video_avi=393636,
video_avs_video=393637,
video_mp4=393638,
video_mpeg=393639,
video_quicktime=393640,
video_vdo=393641,
video_vivo=393642,
video_vnd_rn_realvideo=393643,
video_vosaic=393644,
video_webm=393645,
video_x_amt_demorun=393646,
video_x_amt_showrun=393647,
video_x_atomic3d_feature=393648,
video_x_dl=393649,
video_x_dv=393650,
video_x_fli=393651,
video_x_flv=393652,
video_x_isvideo=393653,
video_x_jng=393654 | 0x80000000,
video_x_m4v=393655,
video_x_matroska=393656,
video_x_mng=393657,
video_x_motion_jpeg=393658,
video_x_ms_asf=393659,
video_x_msvideo=393660,
video_x_qtc=393661,
video_x_sgi_movie=393662,
x_epoc_x_sisx_app=721343,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_csv=590188,
text_html=590189 | 0x01000000,
text_javascript=590190,
text_mcf=590191,
text_pascal=590192,
text_plain=590193,
text_richtext=590194,
text_rtf=590195,
text_scriplet=590196,
text_tab_separated_values=590197,
text_troff=590198,
text_uri_list=590199,
text_vnd_abc=590200,
text_vnd_fmi_flexstor=590201,
text_vnd_wap_wml=590202,
text_vnd_wap_wmlscript=590203,
text_webviewhtml=590204,
text_x_Algol68=590205,
text_x_asm=590206,
text_x_audiosoft_intra=590207,
text_x_awk=590208,
text_x_bcpl=590209,
text_x_c=590210,
text_x_c__=590211,
text_x_component=590212,
text_x_diff=590213,
text_x_fortran=590214,
text_x_java=590215,
text_x_la_asf=590216,
text_x_lisp=590217,
text_x_m=590218,
text_x_m4=590219,
text_x_makefile=590220,
text_x_ms_regedit=590221,
text_x_msdos_batch=590222,
text_x_objective_c=590223,
text_x_pascal=590224,
text_x_perl=590225,
text_x_php=590226,
text_x_po=590227,
text_x_python=590228,
text_x_ruby=590229,
text_x_sass=590230,
text_x_script_python=590231,
text_x_scss=590232,
text_x_server_parsed_html=590233,
text_x_setext=590234,
text_x_sgml=590235 | 0x01000000,
text_x_shellscript=590236,
text_x_speech=590237,
text_x_tcl=590238,
text_x_tex=590239,
text_x_uil=590240,
text_x_uuencode=590241,
text_x_vcalendar=590242,
text_x_vcard=590243,
text_xml=590244 | 0x01000000,
video_MP2T=393637,
video_animaflex=393638,
video_avi=393639,
video_avs_video=393640,
video_mp4=393641,
video_mpeg=393642,
video_quicktime=393643,
video_vdo=393644,
video_vivo=393645,
video_vnd_rn_realvideo=393646,
video_vosaic=393647,
video_webm=393648,
video_x_amt_demorun=393649,
video_x_amt_showrun=393650,
video_x_atomic3d_feature=393651,
video_x_dl=393652,
video_x_dv=393653,
video_x_fli=393654,
video_x_flv=393655,
video_x_isvideo=393656,
video_x_jng=393657 | 0x80000000,
video_x_m4v=393658,
video_x_matroska=393659,
video_x_mng=393660,
video_x_motion_jpeg=393661,
video_x_ms_asf=393662,
video_x_msvideo=393663,
video_x_qtc=393664,
video_x_sgi_movie=393665,
x_epoc_x_sisx_app=721346,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_x_matlab_data: return "application/x-matlab-data";
case application_arj: return "application/arj";
case application_base64: return "application/base64";
case application_binhex: return "application/binhex";
@@ -802,6 +806,8 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal";
case text_PGP: return "text/PGP";
case text_plain: return "text/plain";
case text_x_script_python: return "text/x-script.python";
case text_csv: return "text/csv";
case application_vnd_coffeescript: return "application/vnd.coffeescript";
case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
@@ -906,6 +912,7 @@ case image_x_epson_erf: return "image/x-epson-erf";
case sist2_sidecar: return "sist2/sidecar";
default: return NULL;}}
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
case 2495639202:return application_x_matlab_data;
case 104524599:return application_arj;
case 1388642652:return application_base64;
case 3514823219:return application_binhex;
@@ -1194,6 +1201,7 @@ case 398963028:return text_javascript;
case 1431272808:return text_mcf;
case 509266722:return text_pascal;
case 1689700070:case 794565824:case 351504808:case 214229345:case 30677878:case 1835907068:case 1154021400:case 3992351814:case 2107886487:case 2202503947:case 999008199:case 473390917:case 3679822420:case 1465078094:case 1466496025:case 2277716423:case 157353380:case 2002237032:case 4216257084:case 590894066:case 987584319:case 2268432115:case 3551958239:case 1436306077:case 3060306774:case 808890964:case 2564639436:case 3322219037:case 3334425408:case 3818365258:case 1403162576:case 590812979:case 1800036834:case 144986711:case 621471808:case 449607278:case 2403297477:case 2529069283:case 3929123204:return text_plain;
case 194218739:return text_x_script_python;
case 1401235891:return application_vnd_coffeescript;
case 196656302:case 1203117491:case 3183026384:return text_richtext;
case 2119613712:return text_scriplet;
@@ -1288,6 +1296,7 @@ case 142938048:return image_x_epson_erf;
case 287571459:return sist2_sidecar;
default: return 0;}}
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
case 3272851765: return application_x_matlab_data;
case 3812269631: return application_arj;
case 2479484568: return application_base64;
case 3891182180: return application_binhex;
@@ -1635,6 +1644,8 @@ case 768274928: return text_mcf;
case 3970938585: return text_pascal;
case 1059844876: return text_PGP;
case 1152832851: return text_plain;
case 3112468514: return text_x_script_python;
case 1881267919: return text_csv;
case 2809123822: return application_vnd_coffeescript;
case 4000659158: return text_richtext;
case 1060344107: return text_rtf;

View File

@@ -46,17 +46,13 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
return FILETYPE_MEDIA;
} else if (IS_PDF(mime)) {
return FILETYPE_EBOOK;
} else if (major_mime == MimeText && ScanCtx.text_ctx.content_size > 0) {
if (IS_MARKUP(mime)) {
return FILETYPE_MARKUP;
} else {
return FILETYPE_TEXT;
}
} else if (IS_MARKUP(mime)) {
return FILETYPE_MARKUP;
} else if (major_mime == MimeText) {
return FILETYPE_TEXT;
} else if (IS_FONT(mime)) {
return FILETYPE_FONT;
} else if (
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
} else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
IS_ARC(mime) ||
(IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
)) {
@@ -98,10 +94,6 @@ int get_mime(parse_job_t *job) {
}
}
if (strlen(extension) == 0 && strlen(job->filepath + job->base) == 40) {
fprintf(stderr, "GIT? %s", job->filepath);
}
if (ScanCtx.fast) {
return 0;
}
@@ -122,7 +114,6 @@ int get_mime(parse_job_t *job) {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc));
}
return GET_MIME_ERROR_FATAL;
}
@@ -130,12 +121,13 @@ int get_mime(parse_job_t *job) {
if (magic_mime_str != NULL) {
mime = (int) mime_get_mime_by_string(magic_mime_str);
free(magic_mime_str);
if (mime == 0) {
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
free(magic_mime_str);
return 0;
}
free(magic_mime_str);
}
if (job->vfile.reset != NULL) {
@@ -163,14 +155,11 @@ void parse(parse_job_t *job) {
doc->meta_head = NULL;
doc->meta_tail = NULL;
doc->size = job->vfile.st_size;
doc->mtime = (int) job->vfile.mtime;
doc->mtime = job->vfile.mtime;
doc->mime = get_mime(job);
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
if (doc->mime == GET_MIME_ERROR_FATAL) {
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_failed_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
CLOSE_FILE(job->vfile)
free(doc);
@@ -178,9 +167,6 @@ void parse(parse_job_t *job) {
}
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_skipped_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
CLOSE_FILE(job->vfile)
free(doc);
@@ -246,7 +232,7 @@ void parse(parse_job_t *job) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
meta_parent->key = MetaParent;
strcpy(meta_parent->str_val, job->parent);
APPEND_META((doc), meta_parent)
APPEND_META((doc), meta_parent);
}
CLOSE_FILE(job->vfile)
@@ -254,7 +240,7 @@ void parse(parse_job_t *job) {
if (job->vfile.has_checksum) {
char sha1_digest_str[SHA1_STR_LENGTH];
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str)
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
}
write_document(doc);

View File

@@ -27,6 +27,8 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MAX_THREADS (256)
#include "util.h"
#include "log.h"
#include "types.h"

View File

@@ -6,7 +6,7 @@
#include <sys/wait.h>
#include "parsing/parse.h"
#define BLANK_STR " "
#define BLANK_STR " "
typedef struct {
int thread_id;
@@ -17,7 +17,6 @@ typedef struct {
typedef struct tpool {
pthread_t threads[256];
int num_threads;
int fork;
int print_progress;
@@ -32,6 +31,8 @@ typedef struct tpool {
pthread_cond_t workers_initialized_cond;
int busy_count;
int initialized_count;
int thread_id_to_pid_mapping[MAX_THREADS];
char ipc_database_filepath[128];
} *shm;
} tpool_t;
@@ -43,11 +44,6 @@ void job_destroy(job_t *job) {
free(job);
}
void tpool_dump_debug_info(tpool_t *pool) {
// TODO
LOG_DEBUGF("tpool.c", "pool->num_threads = %d", pool->num_threads);
}
/**
* Push work object to thread pool
*/
@@ -130,108 +126,124 @@ static void worker_thread_loop(tpool_t *pool) {
}
static void worker_proc_init(tpool_t *pool, int thread_id) {
// TODO create PID -> thread_id mapping for signal handler
pthread_mutex_lock(&pool->shm->data_mutex);
pool->shm->thread_id_to_pid_mapping[thread_id] = getpid();
pthread_mutex_unlock(&pool->shm->data_mutex);
ProcData.thread_id = thread_id;
if (ScanCtx.index.path[0] != '\0') {
// TODO This should be closed in proc cleanup function
ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE);
ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx;
database_open(ProcData.index_db);
}
// TODO /dev/shm
pthread_mutex_lock(&pool->shm->mutex);
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_CONSUMER_DATABASE);
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE);
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
database_open(ProcData.ipc_db);
pthread_mutex_unlock(&pool->shm->mutex);
}
void worker_proc_cleanup(tpool_t* pool) {
void worker_proc_cleanup(tpool_t *pool) {
if (ProcData.index_db != NULL) {
database_close(ProcData.index_db, FALSE);
}
database_close(ProcData.ipc_db, FALSE);
}
#ifndef SIST_DEBUG
#define TPOOL_FORK
#endif
/**
* Thread worker function
*/
static void *tpool_worker(void *arg) {
tpool_t *pool = ((start_thread_arg_t *) arg)->pool;
if (pool->fork) {
while (TRUE) {
int pid = fork();
#ifdef TPOOL_FORK
while (TRUE) {
int pid = fork();
if (pid == 0) {
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
if (pid == 0) {
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->workers_initialized_cond);
pool->shm->initialized_count += 1;
pthread_mutex_unlock(&pool->shm->mutex);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->workers_initialized_cond);
pool->shm->initialized_count += 1;
pthread_mutex_unlock(&pool->shm->mutex);
worker_thread_loop(pool);
worker_thread_loop(pool);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->done_working_cond);
pthread_mutex_unlock(&pool->shm->mutex);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->done_working_cond);
pthread_mutex_unlock(&pool->shm->mutex);
worker_proc_cleanup(pool);
worker_proc_cleanup(pool);
exit(0);
exit(0);
} else {
int status;
// TODO: On crash, print debug info and resume thread
waitpid(pid, &status, 0);
} else {
int status;
waitpid(pid, &status, 0);
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
pool->shm->ipc_ctx.completed_job_count += 1;
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
pool->shm->ipc_ctx.completed_job_count += 1;
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
pthread_mutex_lock(&(pool->shm->data_mutex));
pool->shm->busy_count -= 1;
pthread_mutex_unlock(&(pool->shm->data_mutex));
pthread_mutex_lock(&(pool->shm->data_mutex));
pool->shm->busy_count -= 1;
pthread_mutex_unlock(&(pool->shm->data_mutex));
if (WIFSIGNALED(status)) {
// TODO: Get current_job based on PID
const char *job_filepath = "TODO";
LOG_FATALF_NO_EXIT(
"tpool.c",
"Child process was terminated by signal (%s).\n"
BLANK_STR "The process was working on %s",
strsignal(WTERMSIG(status)),
job_filepath
);
if (WIFSIGNALED(status)) {
int crashed_thread_id = -1;
for (int i = 0; i < MAX_THREADS; i++) {
if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
crashed_thread_id = i;
break;
}
}
break;
const char *job_filepath;
if (crashed_thread_id != -1) {
job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id];
} else {
job_filepath = "unknown";
}
LOG_FATALF_NO_EXIT(
"tpool.c",
"Child process crashed (%s).\n"
BLANK_STR "The process was working on %s\n"
BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n"
BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n",
strsignal(WTERMSIG(status)),
job_filepath
);
continue;
}
break;
}
} else {
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->workers_initialized_cond);
pool->shm->initialized_count += 1;
pthread_mutex_unlock(&pool->shm->mutex);
worker_thread_loop(pool);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->done_working_cond);
pthread_mutex_unlock(&pool->shm->mutex);
return NULL;
}
#else
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->workers_initialized_cond);
pool->shm->initialized_count += 1;
pthread_mutex_unlock(&pool->shm->mutex);
worker_thread_loop(pool);
pthread_mutex_lock(&pool->shm->mutex);
pthread_cond_signal(&pool->shm->done_working_cond);
pthread_mutex_unlock(&pool->shm->mutex);
#endif
return NULL;
}
@@ -295,13 +307,10 @@ void tpool_destroy(tpool_t *pool) {
*/
tpool_t *tpool_create(int thread_cnt, int print_progress) {
int fork = FALSE;
tpool_t *pool = malloc(sizeof(tpool_t));
pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
pool->fork = fork;
pool->num_threads = thread_cnt;
pool->shm->ipc_ctx.job_count = 0;
pool->shm->ipc_ctx.no_more_jobs = FALSE;
@@ -310,6 +319,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
pool->shm->job_type = JOB_UNDEFINED;
memset(pool->threads, 0, sizeof(pool->threads));
pool->print_progress = print_progress;
sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
pthread_mutexattr_t mutexattr;
pthread_mutexattr_init(&mutexattr);
@@ -329,10 +339,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
pthread_cond_init(&(pool->shm->done_working_cond), &condattr);
pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr);
remove("/dev/shm/ipc.sist2");
remove("/dev/shm/ipc.sist2-wal");
remove("/dev/shm/ipc.sist2-shm");
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_PRODUCER_DATABASE);
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE);
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
database_initialize(ProcData.ipc_db);

View File

@@ -19,8 +19,6 @@ int tpool_add_work(tpool_t *pool, job_t *job);
void tpool_wait(tpool_t *pool);
void tpool_dump_debug_info(tpool_t *pool);
void job_destroy(job_t *job);
#endif