mirror of
https://github.com/simon987/sist2.git
synced 2025-04-10 14:06:45 +00:00
Fixes and cleanup
This commit is contained in:
parent
fc36f33d52
commit
300c70883d
@ -1,3 +1,4 @@
|
||||
application/x-matlab-data,mat
|
||||
application/arj, arj
|
||||
application/base64, mme
|
||||
application/binhex, hqx
|
||||
@ -346,6 +347,8 @@ text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/PGP,
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
|
||||
text/x-script.python, pyx
|
||||
text/csv,
|
||||
application/vnd.coffeescript, coffee
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
|
|
@ -142,7 +142,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *abs_output = abspath(args->output);
|
||||
if (args->incremental && abs_output == NULL) {
|
||||
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", abs_output);
|
||||
LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
|
||||
args->incremental = FALSE;
|
||||
} else if (!args->incremental && abs_output != NULL) {
|
||||
LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);
|
||||
|
@ -14,6 +14,7 @@ typedef struct scan_args {
|
||||
int content_size;
|
||||
int threads;
|
||||
int incremental;
|
||||
int optimize_database;
|
||||
char *output;
|
||||
char *rewrite_url;
|
||||
char *name;
|
||||
|
@ -38,13 +38,6 @@ typedef struct {
|
||||
pcre_extra *exclude_extra;
|
||||
int fast;
|
||||
|
||||
pthread_mutex_t dbg_current_files_mu;
|
||||
|
||||
int dbg_failed_files_count;
|
||||
int dbg_skipped_files_count;
|
||||
int dbg_excluded_files_count;
|
||||
pthread_mutex_t dbg_file_counts_mu;
|
||||
|
||||
scan_arc_ctx_t arc_ctx;
|
||||
scan_comic_ctx_t comic_ctx;
|
||||
scan_ebook_ctx_t ebook_ctx;
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <time.h>
|
||||
|
||||
|
||||
|
||||
database_t *database_create(const char *filename, database_type_t type) {
|
||||
database_t *db = malloc(sizeof(database_t));
|
||||
|
||||
@ -81,7 +80,7 @@ void database_initialize(database_t *db) {
|
||||
}
|
||||
|
||||
void database_open(database_t *db) {
|
||||
LOG_DEBUGF("tpool.c", "Opening database %s (%d)", db->filename, db->type);
|
||||
LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
|
||||
|
||||
@ -113,7 +112,8 @@ void database_open(database_t *db) {
|
||||
&db->write_document_stmt, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
|
||||
db->db,
|
||||
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", -1,
|
||||
"INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
|
||||
-1,
|
||||
&db->write_thumbnail_stmt, NULL));
|
||||
|
||||
// Create functions
|
||||
@ -186,12 +186,16 @@ void database_close(database_t *db, int optimize) {
|
||||
|
||||
if (optimize) {
|
||||
LOG_DEBUG("database.c", "Optimizing database");
|
||||
// TODO: This should be an optional argument
|
||||
// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
|
||||
}
|
||||
|
||||
sqlite3_close(db->db);
|
||||
|
||||
if (db->type == IPC_PRODUCER_DATABASE) {
|
||||
remove(db->filename);
|
||||
}
|
||||
|
||||
free(db);
|
||||
db = NULL;
|
||||
}
|
||||
@ -202,11 +206,14 @@ void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *r
|
||||
|
||||
int ret = sqlite3_step(db->select_thumbnail_stmt);
|
||||
|
||||
// TODO: if row not found, return null
|
||||
if (ret != SQLITE_ROW) {
|
||||
LOG_FATALF("database.c", "FIXME: tn step returned %d", ret);
|
||||
if (ret == SQLITE_DONE) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
|
||||
*return_value_len = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
|
||||
const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
|
||||
|
||||
@ -275,11 +282,47 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
|
||||
return desc;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
|
||||
|
||||
database_iterator_t *iter = malloc(sizeof(database_iterator_t));
|
||||
|
||||
iter->stmt = stmt;
|
||||
iter->db = db;
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
char *database_delete_list_iter(database_iterator_t *iter) {
|
||||
int ret = sqlite3_step(iter->stmt);
|
||||
|
||||
if (ret == SQLITE_ROW) {
|
||||
const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
|
||||
char *id_heap = malloc(strlen(id) + 1);
|
||||
strcpy(id_heap, id);
|
||||
return id_heap;
|
||||
}
|
||||
|
||||
if (ret != SQLITE_DONE) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
|
||||
LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
|
||||
}
|
||||
|
||||
iter->stmt = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
database_iterator_t *database_create_document_iterator(database_t *db) {
|
||||
|
||||
sqlite3_stmt *stmt;
|
||||
|
||||
// TODO: remove mtime, size, _id from json_data
|
||||
// TODO optimization: remove mtime, size, _id from json_data
|
||||
|
||||
sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
|
||||
" WHEN sc.json_data IS NULL THEN"
|
||||
@ -494,10 +537,10 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
|
||||
return NULL;
|
||||
} else {
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
}
|
||||
|
||||
CRASH_IF_STMT_FAIL(ret);
|
||||
|
||||
job = malloc(sizeof(*job));
|
||||
|
||||
const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
|
||||
@ -511,9 +554,6 @@ job_t *database_get_work(database_t *db, job_type_t job_type) {
|
||||
job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
|
||||
job->bulk_line->next = NULL;
|
||||
|
||||
// TODO CRASH IF NOT OK
|
||||
sqlite3_step(db->pop_parse_job_stmt);
|
||||
|
||||
CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ typedef struct {
|
||||
pthread_mutex_t db_mutex;
|
||||
pthread_mutex_t index_db_mutex;
|
||||
pthread_cond_t has_work_cond;
|
||||
char current_job[256][PATH_MAX * 2];
|
||||
char current_job[MAX_THREADS][PATH_MAX * 2];
|
||||
} database_ipc_ctx_t;
|
||||
|
||||
typedef struct database {
|
||||
@ -106,6 +106,14 @@ cJSON *database_document_iter(database_iterator_t *);
|
||||
#define database_document_iter_foreach(element, iter) \
|
||||
for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
|
||||
|
||||
database_iterator_t *database_create_delete_list_iterator(database_t *db);
|
||||
|
||||
char * database_delete_list_iter(database_iterator_t *iter);
|
||||
|
||||
#define database_delete_list_iter_foreach(element, iter) \
|
||||
for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
|
||||
|
||||
|
||||
cJSON *database_incremental_scan_begin(database_t *db);
|
||||
|
||||
cJSON *database_incremental_scan_end(database_t *db);
|
||||
|
@ -24,9 +24,6 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
} else if (typeflag == FTW_D) {
|
||||
return FTW_SKIP_SUBTREE;
|
||||
}
|
||||
@ -83,13 +80,6 @@ int iterate_file_list(void *input_file) {
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
|
||||
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_excluded_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
212
src/main.c
212
src/main.c
@ -18,10 +18,6 @@
|
||||
|
||||
#include "src/database/database.h"
|
||||
|
||||
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
|
||||
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
@ -32,77 +28,6 @@ static const char *const usage[] = {
|
||||
};
|
||||
|
||||
|
||||
static __sighandler_t sigsegv_handler = NULL;
|
||||
static __sighandler_t sigabrt_handler = NULL;
|
||||
|
||||
void sig_handler(int signum) {
|
||||
|
||||
LogCtx.verbose = TRUE;
|
||||
LogCtx.very_verbose = TRUE;
|
||||
|
||||
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
|
||||
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
|
||||
|
||||
// TODO: Print debug info
|
||||
// if (ScanCtx.dbg_current_files != NULL) {
|
||||
// GHashTableIter iter;
|
||||
// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
|
||||
//
|
||||
// void *key;
|
||||
// void *value;
|
||||
// while (g_hash_table_iter_next(&iter, &key, &value)) {
|
||||
// parse_job_t *job = value;
|
||||
//
|
||||
// if (isatty(STDERR_FILENO)) {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
|
||||
// 31 + ((unsigned int) key) % 7, key, job->filepath
|
||||
// );
|
||||
// } else {
|
||||
// LOG_DEBUGF(
|
||||
// "*SIGNAL HANDLER*",
|
||||
// "THREAD [%04llX] was working on job %s",
|
||||
// key, job->filepath
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (ScanCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(ScanCtx.pool);
|
||||
}
|
||||
|
||||
if (IndexCtx.pool != NULL) {
|
||||
tpool_dump_debug_info(IndexCtx.pool);
|
||||
}
|
||||
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
|
||||
);
|
||||
LOG_INFO(
|
||||
"*SIGNAL HANDLER*",
|
||||
"sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
|
||||
);
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
LOG_WARNING(
|
||||
"*SIGNAL HANDLER*",
|
||||
"You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
|
||||
"releases page to provide additionnal information when submitting a bug report."
|
||||
);
|
||||
#endif
|
||||
|
||||
if (signum == SIGSEGV && sigsegv_handler != NULL) {
|
||||
sigsegv_handler(signum);
|
||||
} else if (signum == SIGABRT && sigabrt_handler != NULL) {
|
||||
sigabrt_handler(signum);
|
||||
}
|
||||
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void database_scan_begin(scan_args_t *args) {
|
||||
index_descriptor_t *desc = &ScanCtx.index.desc;
|
||||
|
||||
@ -158,7 +83,7 @@ void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
|
||||
database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
|
||||
}
|
||||
|
||||
void _log(const char *filepath, int level, char *str) {
|
||||
void log_callback(const char *filepath, int level, char *str) {
|
||||
if (level == LEVEL_FATAL) {
|
||||
sist_log(filepath, level, str);
|
||||
exit(-1);
|
||||
@ -175,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
|
||||
}
|
||||
}
|
||||
|
||||
void _logf(const char *filepath, int level, char *format, ...) {
|
||||
void logf_callback(const char *filepath, int level, char *format, ...) {
|
||||
|
||||
va_list args;
|
||||
|
||||
@ -198,15 +123,13 @@ void _logf(const char *filepath, int level, char *format, ...) {
|
||||
}
|
||||
|
||||
void initialize_scan_context(scan_args_t *args) {
|
||||
// TODO: shared
|
||||
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
|
||||
|
||||
ScanCtx.calculate_checksums = args->calculate_checksums;
|
||||
|
||||
// Archive
|
||||
ScanCtx.arc_ctx.mode = args->archive_mode;
|
||||
ScanCtx.arc_ctx.log = _log;
|
||||
ScanCtx.arc_ctx.logf = _logf;
|
||||
ScanCtx.arc_ctx.log = log_callback;
|
||||
ScanCtx.arc_ctx.logf = logf_callback;
|
||||
ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
|
||||
if (args->archive_passphrase != NULL) {
|
||||
strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
|
||||
@ -215,8 +138,8 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
}
|
||||
|
||||
// Comic
|
||||
ScanCtx.comic_ctx.log = _log;
|
||||
ScanCtx.comic_ctx.logf = _logf;
|
||||
ScanCtx.comic_ctx.log = log_callback;
|
||||
ScanCtx.comic_ctx.logf = logf_callback;
|
||||
ScanCtx.comic_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.comic_ctx.tn_size = args->tn_size;
|
||||
@ -232,24 +155,24 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
|
||||
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
|
||||
}
|
||||
ScanCtx.ebook_ctx.log = _log;
|
||||
ScanCtx.ebook_ctx.logf = _logf;
|
||||
ScanCtx.ebook_ctx.log = log_callback;
|
||||
ScanCtx.ebook_ctx.logf = logf_callback;
|
||||
ScanCtx.ebook_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
|
||||
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
|
||||
|
||||
// Font
|
||||
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.font_ctx.log = _log;
|
||||
ScanCtx.font_ctx.logf = _logf;
|
||||
ScanCtx.font_ctx.log = log_callback;
|
||||
ScanCtx.font_ctx.logf = logf_callback;
|
||||
ScanCtx.font_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Media
|
||||
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.media_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.media_ctx.tn_count = args->tn_count;
|
||||
ScanCtx.media_ctx.log = _log;
|
||||
ScanCtx.media_ctx.logf = _logf;
|
||||
ScanCtx.media_ctx.log = log_callback;
|
||||
ScanCtx.media_ctx.logf = logf_callback;
|
||||
ScanCtx.media_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
|
||||
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
|
||||
@ -264,24 +187,24 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
// OOXML
|
||||
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.ooxml_ctx.content_size = args->content_size;
|
||||
ScanCtx.ooxml_ctx.log = _log;
|
||||
ScanCtx.ooxml_ctx.logf = _logf;
|
||||
ScanCtx.ooxml_ctx.log = log_callback;
|
||||
ScanCtx.ooxml_ctx.logf = logf_callback;
|
||||
ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// MOBI
|
||||
ScanCtx.mobi_ctx.content_size = args->content_size;
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
ScanCtx.mobi_ctx.log = log_callback;
|
||||
ScanCtx.mobi_ctx.logf = logf_callback;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
ScanCtx.text_ctx.log = log_callback;
|
||||
ScanCtx.text_ctx.logf = logf_callback;
|
||||
|
||||
// MSDOC
|
||||
ScanCtx.msdoc_ctx.content_size = args->content_size;
|
||||
ScanCtx.msdoc_ctx.log = _log;
|
||||
ScanCtx.msdoc_ctx.logf = _logf;
|
||||
ScanCtx.msdoc_ctx.log = log_callback;
|
||||
ScanCtx.msdoc_ctx.logf = logf_callback;
|
||||
ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
|
||||
ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
|
||||
|
||||
@ -299,20 +222,20 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
|
||||
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
|
||||
ScanCtx.raw_ctx.tn_size = args->tn_size;
|
||||
ScanCtx.raw_ctx.log = _log;
|
||||
ScanCtx.raw_ctx.logf = _logf;
|
||||
ScanCtx.raw_ctx.log = log_callback;
|
||||
ScanCtx.raw_ctx.logf = logf_callback;
|
||||
ScanCtx.raw_ctx.store = write_thumbnail_callback;
|
||||
|
||||
// Wpd
|
||||
ScanCtx.wpd_ctx.content_size = args->content_size;
|
||||
ScanCtx.wpd_ctx.log = _log;
|
||||
ScanCtx.wpd_ctx.logf = _logf;
|
||||
ScanCtx.wpd_ctx.log = log_callback;
|
||||
ScanCtx.wpd_ctx.logf = logf_callback;
|
||||
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
|
||||
|
||||
// Json
|
||||
ScanCtx.json_ctx.content_size = args->content_size;
|
||||
ScanCtx.json_ctx.log = _log;
|
||||
ScanCtx.json_ctx.logf = _logf;
|
||||
ScanCtx.json_ctx.log = log_callback;
|
||||
ScanCtx.json_ctx.logf = logf_callback;
|
||||
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
|
||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
|
||||
}
|
||||
@ -344,9 +267,6 @@ void sist2_scan(scan_args_t *args) {
|
||||
tpool_wait(ScanCtx.pool);
|
||||
tpool_destroy(ScanCtx.pool);
|
||||
|
||||
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count);
|
||||
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count);
|
||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count);
|
||||
LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
|
||||
LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
|
||||
|
||||
@ -358,7 +278,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
}
|
||||
|
||||
database_generate_stats(db, args->treemap_threshold);
|
||||
database_close(db, TRUE);
|
||||
database_close(db, args->optimize_database);
|
||||
}
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
@ -397,16 +317,19 @@ void sist2_index(index_args_t *args) {
|
||||
print_json(json, doc_id);
|
||||
} else {
|
||||
index_json(json, doc_id);
|
||||
cnt +=1;
|
||||
cnt += 1;
|
||||
}
|
||||
}
|
||||
|
||||
free(iterator);
|
||||
database_close(db, FALSE);
|
||||
|
||||
// Only read the _delete index if we're sending data to ES
|
||||
if (!args->print) {
|
||||
// TODO: (delete_list iterator)
|
||||
database_iterator_t *del_iter = database_create_delete_list_iterator(db);
|
||||
database_delete_list_iter_foreach(id, del_iter) {
|
||||
delete_document(id);
|
||||
free(id);
|
||||
}
|
||||
}
|
||||
|
||||
tpool_wait(IndexCtx.pool);
|
||||
@ -496,12 +419,7 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
|
||||
}
|
||||
}
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
// sigsegv_handler = signal(SIGSEGV, sig_handler);
|
||||
// sigabrt_handler = signal(SIGABRT, sig_handler);
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
@ -521,36 +439,37 @@ int main(int argc, const char *argv[]) {
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
|
||||
OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
|
||||
OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
|
||||
OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
|
||||
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
|
||||
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
|
||||
"Thumbnail size, in pixels. DEFAULT=500",
|
||||
"Thumbnail size, in pixels. DEFAULT: 552",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
|
||||
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
|
||||
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
|
||||
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
|
||||
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
|
||||
OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
|
||||
// TODO: Update help string
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
"If the output file path exists, only scan new or modified files."),
|
||||
OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
|
||||
"Defragment index file after scan to reduce its file size."),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
|
||||
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
|
||||
"Use 0 to only scan files in PATH. DEFAULT: -1"),
|
||||
OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
|
||||
"skip: Don't parse, list: only get file names as text, "
|
||||
"shallow: Don't parse archives inside archives. DEFAULT: recurse"),
|
||||
"skip: don't scan, list: only save file names as text, "
|
||||
"shallow: don't scan archives inside archives. DEFAULT: recurse"),
|
||||
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
|
||||
"Passphrase for encrypted archive files"),
|
||||
|
||||
@ -559,8 +478,8 @@ int main(int argc, const char *argv[]) {
|
||||
"which are installed on your machine)"),
|
||||
OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
|
||||
OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
|
||||
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
|
||||
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
|
||||
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
|
||||
"(see USAGE.md). DEFAULT: 0.0005"),
|
||||
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
|
||||
@ -568,19 +487,20 @@ int main(int argc, const char *argv[]) {
|
||||
"(see USAGE.md). DEFAULT: 2000"),
|
||||
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
|
||||
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
|
||||
"Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
|
||||
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
|
||||
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
|
||||
" instead of normal directory traversal. Use '-' to read"
|
||||
" from stdin."),
|
||||
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print,
|
||||
"Print JSON documents to stdout instead of indexing to elasticsearch."),
|
||||
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||
"Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
@ -588,15 +508,15 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "bind", &web_args->listen_address,
|
||||
"Listen for connections on this address. DEFAULT: localhost:4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
|
||||
OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
|
||||
@ -609,10 +529,10 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
|
||||
|
||||
OPT_GROUP("Exec-script options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
|
||||
OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
|
||||
"Do not verify SSL connections to Elasticsearch."),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
|
||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||
OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
|
||||
|
||||
@ -621,7 +541,11 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
struct argparse argparse;
|
||||
argparse_init(&argparse, options, usage, 0);
|
||||
argparse_describe(&argparse, DESCRIPTION, EPILOG);
|
||||
argparse_describe(
|
||||
&argparse,
|
||||
"\nLightning-fast file system indexer and search tool.",
|
||||
"\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
);
|
||||
argc = argparse_parse(&argparse, argc, argv);
|
||||
|
||||
if (arg_version) {
|
||||
|
@ -6,7 +6,6 @@
|
||||
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
|
||||
|
||||
static int fs_read(struct vfile *f, void *buf, size_t size) {
|
||||
|
||||
if (f->fd == -1) {
|
||||
SHA1_Init(&f->sha1_ctx);
|
||||
|
||||
|
@ -12,7 +12,7 @@ char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
|
||||
const char *magic_buffers[1] = {magic_database_buffer,};
|
||||
size_t sizes[1] = {sizeof(magic_database_buffer),};
|
||||
|
||||
// TODO: check if we can reuse the magic instance
|
||||
// TODO optimisation: check if we can reuse the magic instance
|
||||
int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
|
||||
|
||||
if (load_ret != 0) {
|
||||
|
567
src/parsing/mime_generated.c
vendored
567
src/parsing/mime_generated.c
vendored
@ -174,287 +174,291 @@ application_x_mach_binary=655526,
|
||||
application_x_mach_executable=655527,
|
||||
application_x_magic_cap_package_1_0=655528,
|
||||
application_x_mathcad=655529,
|
||||
application_x_maxis_dbpf=655530,
|
||||
application_x_meme=655531,
|
||||
application_x_midi=655532,
|
||||
application_x_mif=655533,
|
||||
application_x_mix_transfer=655534,
|
||||
application_x_mobipocket_ebook=655535 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655536,
|
||||
application_x_ms_pdb=655537,
|
||||
application_x_ms_reader=655538,
|
||||
application_x_msaccess=655539,
|
||||
application_x_n64_rom=655540,
|
||||
application_x_navi_animation=655541,
|
||||
application_x_navidoc=655542,
|
||||
application_x_navimap=655543,
|
||||
application_x_navistyle=655544,
|
||||
application_x_nes_rom=655545,
|
||||
application_x_netcdf=655546,
|
||||
application_x_newton_compatible_pkg=655547,
|
||||
application_x_nintendo_ds_rom=655548,
|
||||
application_x_object=655549,
|
||||
application_x_omc=655550,
|
||||
application_x_omcdatamaker=655551,
|
||||
application_x_omcregerator=655552,
|
||||
application_x_pagemaker=655553,
|
||||
application_x_pcl=655554,
|
||||
application_x_pgp_keyring=655555,
|
||||
application_x_pixclscript=655556,
|
||||
application_x_pkcs7_certreqresp=655557,
|
||||
application_x_pkcs7_signature=655558,
|
||||
application_x_project=655559,
|
||||
application_x_qpro=655560,
|
||||
application_x_rar=655561 | 0x10000000,
|
||||
application_x_rpm=655562,
|
||||
application_x_sdp=655563,
|
||||
application_x_sea=655564,
|
||||
application_x_seelogo=655565,
|
||||
application_x_setupscript=655566,
|
||||
application_x_shar=655567,
|
||||
application_x_sharedlib=655568,
|
||||
application_x_shockwave_flash=655569,
|
||||
application_x_snappy_framed=655570,
|
||||
application_x_sprite=655571,
|
||||
application_x_sqlite3=655572,
|
||||
application_x_stargallery_thm=655573,
|
||||
application_x_stuffit=655574,
|
||||
application_x_sv4cpio=655575,
|
||||
application_x_sv4crc=655576,
|
||||
application_x_tar=655577 | 0x10000000,
|
||||
application_x_tbook=655578,
|
||||
application_x_terminfo=655579,
|
||||
application_x_terminfo2=655580,
|
||||
application_x_tex_tfm=655581,
|
||||
application_x_texinfo=655582,
|
||||
application_x_ustar=655583,
|
||||
application_x_visio=655584,
|
||||
application_x_vnd_audioexplosion_mzz=655585,
|
||||
application_x_vnd_ls_xpix=655586,
|
||||
application_x_vrml=655587,
|
||||
application_x_wais_source=655588,
|
||||
application_x_wine_extension_ini=655589,
|
||||
application_x_wintalk=655590,
|
||||
application_x_world=655591,
|
||||
application_x_wri=655592,
|
||||
application_x_x509_ca_cert=655593,
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_x_zstd_dictionary=655597,
|
||||
application_xml=655598,
|
||||
application_zip=655599 | 0x10000000,
|
||||
application_zlib=655600,
|
||||
audio_basic=458993 | 0x80000000,
|
||||
audio_it=458994,
|
||||
audio_make=458995,
|
||||
audio_mid=458996,
|
||||
audio_midi=458997,
|
||||
audio_mp4=458998,
|
||||
audio_mpeg=458999,
|
||||
audio_ogg=459000,
|
||||
audio_s3m=459001,
|
||||
audio_tsp_audio=459002,
|
||||
audio_tsplayer=459003,
|
||||
audio_vnd_qcelp=459004,
|
||||
audio_voxware=459005,
|
||||
audio_x_aiff=459006,
|
||||
audio_x_flac=459007,
|
||||
audio_x_gsm=459008,
|
||||
audio_x_hx_aac_adts=459009,
|
||||
audio_x_jam=459010,
|
||||
audio_x_liveaudio=459011,
|
||||
audio_x_m4a=459012,
|
||||
audio_x_midi=459013,
|
||||
audio_x_mod=459014,
|
||||
audio_x_mp4a_latm=459015,
|
||||
audio_x_mpeg_3=459016,
|
||||
audio_x_mpequrl=459017,
|
||||
audio_x_nspaudio=459018,
|
||||
audio_x_pn_realaudio=459019,
|
||||
audio_x_psid=459020,
|
||||
audio_x_realaudio=459021,
|
||||
audio_x_s3m=459022,
|
||||
audio_x_twinvq=459023,
|
||||
audio_x_twinvq_plugin=459024,
|
||||
audio_x_voc=459025,
|
||||
audio_x_wav=459026,
|
||||
audio_x_xbox_executable=459027 | 0x80000000,
|
||||
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||
audio_xm=459029,
|
||||
font_otf=327958 | 0x20000000,
|
||||
font_sfnt=327959 | 0x20000000,
|
||||
font_woff=327960 | 0x20000000,
|
||||
font_woff2=327961 | 0x20000000,
|
||||
image_bmp=524570,
|
||||
image_cmu_raster=524571,
|
||||
image_fif=524572,
|
||||
image_florian=524573,
|
||||
image_g3fax=524574,
|
||||
image_gif=524575,
|
||||
image_heic=524576,
|
||||
image_ief=524577,
|
||||
image_jpeg=524578,
|
||||
image_jutvision=524579,
|
||||
image_naplps=524580,
|
||||
image_pict=524581,
|
||||
image_png=524582,
|
||||
image_svg=524583 | 0x80000000,
|
||||
image_svg_xml=524584 | 0x80000000,
|
||||
image_tiff=524585,
|
||||
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||
image_vnd_djvu=524587 | 0x80000000,
|
||||
image_vnd_fpx=524588,
|
||||
image_vnd_microsoft_icon=524589,
|
||||
image_vnd_rn_realflash=524590,
|
||||
image_vnd_rn_realpix=524591,
|
||||
image_vnd_wap_wbmp=524592,
|
||||
image_vnd_xiff=524593,
|
||||
image_webp=524594,
|
||||
image_wmf=524595,
|
||||
image_x_3ds=524596,
|
||||
image_x_adobe_dng=524597 | 0x00800000,
|
||||
image_x_award_bioslogo=524598,
|
||||
image_x_canon_cr2=524599 | 0x00800000,
|
||||
image_x_canon_crw=524600 | 0x00800000,
|
||||
image_x_cmu_raster=524601,
|
||||
image_x_cur=524602,
|
||||
image_x_dcraw=524603 | 0x00800000,
|
||||
image_x_dwg=524604,
|
||||
image_x_eps=524605,
|
||||
image_x_epson_erf=524606 | 0x00800000,
|
||||
image_x_exr=524607,
|
||||
image_x_fuji_raf=524608 | 0x00800000,
|
||||
image_x_gem=524609,
|
||||
image_x_icns=524610,
|
||||
image_x_icon=524611 | 0x80000000,
|
||||
image_x_jg=524612,
|
||||
image_x_jps=524613,
|
||||
image_x_kodak_dcr=524614 | 0x00800000,
|
||||
image_x_kodak_k25=524615 | 0x00800000,
|
||||
image_x_kodak_kdc=524616 | 0x00800000,
|
||||
image_x_minolta_mrw=524617 | 0x00800000,
|
||||
image_x_ms_bmp=524618,
|
||||
image_x_niff=524619,
|
||||
image_x_nikon_nef=524620 | 0x00800000,
|
||||
image_x_olympus_orf=524621 | 0x00800000,
|
||||
image_x_panasonic_raw=524622 | 0x00800000,
|
||||
image_x_pcx=524623,
|
||||
image_x_pentax_pef=524624 | 0x00800000,
|
||||
image_x_pict=524625,
|
||||
image_x_portable_bitmap=524626,
|
||||
image_x_portable_graymap=524627,
|
||||
image_x_portable_pixmap=524628,
|
||||
image_x_quicktime=524629,
|
||||
image_x_rgb=524630,
|
||||
image_x_sigma_x3f=524631 | 0x00800000,
|
||||
image_x_sony_arw=524632 | 0x00800000,
|
||||
image_x_sony_sr2=524633 | 0x00800000,
|
||||
image_x_sony_srf=524634 | 0x00800000,
|
||||
image_x_tga=524635,
|
||||
image_x_tiff=524636,
|
||||
image_x_win_bitmap=524637,
|
||||
image_x_xcf=524638 | 0x80000000,
|
||||
image_x_xpixmap=524639 | 0x80000000,
|
||||
image_x_xwindowdump=524640,
|
||||
message_news=196961,
|
||||
message_rfc822=196962,
|
||||
model_vnd_dwf=65891,
|
||||
model_vnd_gdl=65892,
|
||||
model_vnd_gs_gdl=65893,
|
||||
model_vrml=65894,
|
||||
model_x_pov=65895,
|
||||
application_x_matlab_data=655530,
|
||||
application_x_maxis_dbpf=655531,
|
||||
application_x_meme=655532,
|
||||
application_x_midi=655533,
|
||||
application_x_mif=655534,
|
||||
application_x_mix_transfer=655535,
|
||||
application_x_mobipocket_ebook=655536 | 0x02000000,
|
||||
application_x_ms_compress_szdd=655537,
|
||||
application_x_ms_pdb=655538,
|
||||
application_x_ms_reader=655539,
|
||||
application_x_msaccess=655540,
|
||||
application_x_n64_rom=655541,
|
||||
application_x_navi_animation=655542,
|
||||
application_x_navidoc=655543,
|
||||
application_x_navimap=655544,
|
||||
application_x_navistyle=655545,
|
||||
application_x_nes_rom=655546,
|
||||
application_x_netcdf=655547,
|
||||
application_x_newton_compatible_pkg=655548,
|
||||
application_x_nintendo_ds_rom=655549,
|
||||
application_x_object=655550,
|
||||
application_x_omc=655551,
|
||||
application_x_omcdatamaker=655552,
|
||||
application_x_omcregerator=655553,
|
||||
application_x_pagemaker=655554,
|
||||
application_x_pcl=655555,
|
||||
application_x_pgp_keyring=655556,
|
||||
application_x_pixclscript=655557,
|
||||
application_x_pkcs7_certreqresp=655558,
|
||||
application_x_pkcs7_signature=655559,
|
||||
application_x_project=655560,
|
||||
application_x_qpro=655561,
|
||||
application_x_rar=655562 | 0x10000000,
|
||||
application_x_rpm=655563,
|
||||
application_x_sdp=655564,
|
||||
application_x_sea=655565,
|
||||
application_x_seelogo=655566,
|
||||
application_x_setupscript=655567,
|
||||
application_x_shar=655568,
|
||||
application_x_sharedlib=655569,
|
||||
application_x_shockwave_flash=655570,
|
||||
application_x_snappy_framed=655571,
|
||||
application_x_sprite=655572,
|
||||
application_x_sqlite3=655573,
|
||||
application_x_stargallery_thm=655574,
|
||||
application_x_stuffit=655575,
|
||||
application_x_sv4cpio=655576,
|
||||
application_x_sv4crc=655577,
|
||||
application_x_tar=655578 | 0x10000000,
|
||||
application_x_tbook=655579,
|
||||
application_x_terminfo=655580,
|
||||
application_x_terminfo2=655581,
|
||||
application_x_tex_tfm=655582,
|
||||
application_x_texinfo=655583,
|
||||
application_x_ustar=655584,
|
||||
application_x_visio=655585,
|
||||
application_x_vnd_audioexplosion_mzz=655586,
|
||||
application_x_vnd_ls_xpix=655587,
|
||||
application_x_vrml=655588,
|
||||
application_x_wais_source=655589,
|
||||
application_x_wine_extension_ini=655590,
|
||||
application_x_wintalk=655591,
|
||||
application_x_world=655592,
|
||||
application_x_wri=655593,
|
||||
application_x_x509_ca_cert=655594,
|
||||
application_x_xz=655595 | 0x08000000,
|
||||
application_x_zip=655596,
|
||||
application_x_zstd=655597 | 0x08000000,
|
||||
application_x_zstd_dictionary=655598,
|
||||
application_xml=655599,
|
||||
application_zip=655600 | 0x10000000,
|
||||
application_zlib=655601,
|
||||
audio_basic=458994 | 0x80000000,
|
||||
audio_it=458995,
|
||||
audio_make=458996,
|
||||
audio_mid=458997,
|
||||
audio_midi=458998,
|
||||
audio_mp4=458999,
|
||||
audio_mpeg=459000,
|
||||
audio_ogg=459001,
|
||||
audio_s3m=459002,
|
||||
audio_tsp_audio=459003,
|
||||
audio_tsplayer=459004,
|
||||
audio_vnd_qcelp=459005,
|
||||
audio_voxware=459006,
|
||||
audio_x_aiff=459007,
|
||||
audio_x_flac=459008,
|
||||
audio_x_gsm=459009,
|
||||
audio_x_hx_aac_adts=459010,
|
||||
audio_x_jam=459011,
|
||||
audio_x_liveaudio=459012,
|
||||
audio_x_m4a=459013,
|
||||
audio_x_midi=459014,
|
||||
audio_x_mod=459015,
|
||||
audio_x_mp4a_latm=459016,
|
||||
audio_x_mpeg_3=459017,
|
||||
audio_x_mpequrl=459018,
|
||||
audio_x_nspaudio=459019,
|
||||
audio_x_pn_realaudio=459020,
|
||||
audio_x_psid=459021,
|
||||
audio_x_realaudio=459022,
|
||||
audio_x_s3m=459023,
|
||||
audio_x_twinvq=459024,
|
||||
audio_x_twinvq_plugin=459025,
|
||||
audio_x_voc=459026,
|
||||
audio_x_wav=459027,
|
||||
audio_x_xbox_executable=459028 | 0x80000000,
|
||||
audio_x_xbox360_executable=459029 | 0x80000000,
|
||||
audio_xm=459030,
|
||||
font_otf=327959 | 0x20000000,
|
||||
font_sfnt=327960 | 0x20000000,
|
||||
font_woff=327961 | 0x20000000,
|
||||
font_woff2=327962 | 0x20000000,
|
||||
image_bmp=524571,
|
||||
image_cmu_raster=524572,
|
||||
image_fif=524573,
|
||||
image_florian=524574,
|
||||
image_g3fax=524575,
|
||||
image_gif=524576,
|
||||
image_heic=524577,
|
||||
image_ief=524578,
|
||||
image_jpeg=524579,
|
||||
image_jutvision=524580,
|
||||
image_naplps=524581,
|
||||
image_pict=524582,
|
||||
image_png=524583,
|
||||
image_svg=524584 | 0x80000000,
|
||||
image_svg_xml=524585 | 0x80000000,
|
||||
image_tiff=524586,
|
||||
image_vnd_adobe_photoshop=524587 | 0x80000000,
|
||||
image_vnd_djvu=524588 | 0x80000000,
|
||||
image_vnd_fpx=524589,
|
||||
image_vnd_microsoft_icon=524590,
|
||||
image_vnd_rn_realflash=524591,
|
||||
image_vnd_rn_realpix=524592,
|
||||
image_vnd_wap_wbmp=524593,
|
||||
image_vnd_xiff=524594,
|
||||
image_webp=524595,
|
||||
image_wmf=524596,
|
||||
image_x_3ds=524597,
|
||||
image_x_adobe_dng=524598 | 0x00800000,
|
||||
image_x_award_bioslogo=524599,
|
||||
image_x_canon_cr2=524600 | 0x00800000,
|
||||
image_x_canon_crw=524601 | 0x00800000,
|
||||
image_x_cmu_raster=524602,
|
||||
image_x_cur=524603,
|
||||
image_x_dcraw=524604 | 0x00800000,
|
||||
image_x_dwg=524605,
|
||||
image_x_eps=524606,
|
||||
image_x_epson_erf=524607 | 0x00800000,
|
||||
image_x_exr=524608,
|
||||
image_x_fuji_raf=524609 | 0x00800000,
|
||||
image_x_gem=524610,
|
||||
image_x_icns=524611,
|
||||
image_x_icon=524612 | 0x80000000,
|
||||
image_x_jg=524613,
|
||||
image_x_jps=524614,
|
||||
image_x_kodak_dcr=524615 | 0x00800000,
|
||||
image_x_kodak_k25=524616 | 0x00800000,
|
||||
image_x_kodak_kdc=524617 | 0x00800000,
|
||||
image_x_minolta_mrw=524618 | 0x00800000,
|
||||
image_x_ms_bmp=524619,
|
||||
image_x_niff=524620,
|
||||
image_x_nikon_nef=524621 | 0x00800000,
|
||||
image_x_olympus_orf=524622 | 0x00800000,
|
||||
image_x_panasonic_raw=524623 | 0x00800000,
|
||||
image_x_pcx=524624,
|
||||
image_x_pentax_pef=524625 | 0x00800000,
|
||||
image_x_pict=524626,
|
||||
image_x_portable_bitmap=524627,
|
||||
image_x_portable_graymap=524628,
|
||||
image_x_portable_pixmap=524629,
|
||||
image_x_quicktime=524630,
|
||||
image_x_rgb=524631,
|
||||
image_x_sigma_x3f=524632 | 0x00800000,
|
||||
image_x_sony_arw=524633 | 0x00800000,
|
||||
image_x_sony_sr2=524634 | 0x00800000,
|
||||
image_x_sony_srf=524635 | 0x00800000,
|
||||
image_x_tga=524636,
|
||||
image_x_tiff=524637,
|
||||
image_x_win_bitmap=524638,
|
||||
image_x_xcf=524639 | 0x80000000,
|
||||
image_x_xpixmap=524640 | 0x80000000,
|
||||
image_x_xwindowdump=524641,
|
||||
message_news=196962,
|
||||
message_rfc822=196963,
|
||||
model_vnd_dwf=65892,
|
||||
model_vnd_gdl=65893,
|
||||
model_vnd_gs_gdl=65894,
|
||||
model_vrml=65895,
|
||||
model_x_pov=65896,
|
||||
sist2_sidecar=2,
|
||||
text_PGP=590184,
|
||||
text_asp=590185,
|
||||
text_css=590186,
|
||||
text_html=590187 | 0x01000000,
|
||||
text_javascript=590188,
|
||||
text_mcf=590189,
|
||||
text_pascal=590190,
|
||||
text_plain=590191,
|
||||
text_richtext=590192,
|
||||
text_rtf=590193,
|
||||
text_scriplet=590194,
|
||||
text_tab_separated_values=590195,
|
||||
text_troff=590196,
|
||||
text_uri_list=590197,
|
||||
text_vnd_abc=590198,
|
||||
text_vnd_fmi_flexstor=590199,
|
||||
text_vnd_wap_wml=590200,
|
||||
text_vnd_wap_wmlscript=590201,
|
||||
text_webviewhtml=590202,
|
||||
text_x_Algol68=590203,
|
||||
text_x_asm=590204,
|
||||
text_x_audiosoft_intra=590205,
|
||||
text_x_awk=590206,
|
||||
text_x_bcpl=590207,
|
||||
text_x_c=590208,
|
||||
text_x_c__=590209,
|
||||
text_x_component=590210,
|
||||
text_x_diff=590211,
|
||||
text_x_fortran=590212,
|
||||
text_x_java=590213,
|
||||
text_x_la_asf=590214,
|
||||
text_x_lisp=590215,
|
||||
text_x_m=590216,
|
||||
text_x_m4=590217,
|
||||
text_x_makefile=590218,
|
||||
text_x_ms_regedit=590219,
|
||||
text_x_msdos_batch=590220,
|
||||
text_x_objective_c=590221,
|
||||
text_x_pascal=590222,
|
||||
text_x_perl=590223,
|
||||
text_x_php=590224,
|
||||
text_x_po=590225,
|
||||
text_x_python=590226,
|
||||
text_x_ruby=590227,
|
||||
text_x_sass=590228,
|
||||
text_x_scss=590229,
|
||||
text_x_server_parsed_html=590230,
|
||||
text_x_setext=590231,
|
||||
text_x_sgml=590232 | 0x01000000,
|
||||
text_x_shellscript=590233,
|
||||
text_x_speech=590234,
|
||||
text_x_tcl=590235,
|
||||
text_x_tex=590236,
|
||||
text_x_uil=590237,
|
||||
text_x_uuencode=590238,
|
||||
text_x_vcalendar=590239,
|
||||
text_x_vcard=590240,
|
||||
text_xml=590241 | 0x01000000,
|
||||
video_MP2T=393634,
|
||||
video_animaflex=393635,
|
||||
video_avi=393636,
|
||||
video_avs_video=393637,
|
||||
video_mp4=393638,
|
||||
video_mpeg=393639,
|
||||
video_quicktime=393640,
|
||||
video_vdo=393641,
|
||||
video_vivo=393642,
|
||||
video_vnd_rn_realvideo=393643,
|
||||
video_vosaic=393644,
|
||||
video_webm=393645,
|
||||
video_x_amt_demorun=393646,
|
||||
video_x_amt_showrun=393647,
|
||||
video_x_atomic3d_feature=393648,
|
||||
video_x_dl=393649,
|
||||
video_x_dv=393650,
|
||||
video_x_fli=393651,
|
||||
video_x_flv=393652,
|
||||
video_x_isvideo=393653,
|
||||
video_x_jng=393654 | 0x80000000,
|
||||
video_x_m4v=393655,
|
||||
video_x_matroska=393656,
|
||||
video_x_mng=393657,
|
||||
video_x_motion_jpeg=393658,
|
||||
video_x_ms_asf=393659,
|
||||
video_x_msvideo=393660,
|
||||
video_x_qtc=393661,
|
||||
video_x_sgi_movie=393662,
|
||||
x_epoc_x_sisx_app=721343,
|
||||
text_PGP=590185,
|
||||
text_asp=590186,
|
||||
text_css=590187,
|
||||
text_csv=590188,
|
||||
text_html=590189 | 0x01000000,
|
||||
text_javascript=590190,
|
||||
text_mcf=590191,
|
||||
text_pascal=590192,
|
||||
text_plain=590193,
|
||||
text_richtext=590194,
|
||||
text_rtf=590195,
|
||||
text_scriplet=590196,
|
||||
text_tab_separated_values=590197,
|
||||
text_troff=590198,
|
||||
text_uri_list=590199,
|
||||
text_vnd_abc=590200,
|
||||
text_vnd_fmi_flexstor=590201,
|
||||
text_vnd_wap_wml=590202,
|
||||
text_vnd_wap_wmlscript=590203,
|
||||
text_webviewhtml=590204,
|
||||
text_x_Algol68=590205,
|
||||
text_x_asm=590206,
|
||||
text_x_audiosoft_intra=590207,
|
||||
text_x_awk=590208,
|
||||
text_x_bcpl=590209,
|
||||
text_x_c=590210,
|
||||
text_x_c__=590211,
|
||||
text_x_component=590212,
|
||||
text_x_diff=590213,
|
||||
text_x_fortran=590214,
|
||||
text_x_java=590215,
|
||||
text_x_la_asf=590216,
|
||||
text_x_lisp=590217,
|
||||
text_x_m=590218,
|
||||
text_x_m4=590219,
|
||||
text_x_makefile=590220,
|
||||
text_x_ms_regedit=590221,
|
||||
text_x_msdos_batch=590222,
|
||||
text_x_objective_c=590223,
|
||||
text_x_pascal=590224,
|
||||
text_x_perl=590225,
|
||||
text_x_php=590226,
|
||||
text_x_po=590227,
|
||||
text_x_python=590228,
|
||||
text_x_ruby=590229,
|
||||
text_x_sass=590230,
|
||||
text_x_script_python=590231,
|
||||
text_x_scss=590232,
|
||||
text_x_server_parsed_html=590233,
|
||||
text_x_setext=590234,
|
||||
text_x_sgml=590235 | 0x01000000,
|
||||
text_x_shellscript=590236,
|
||||
text_x_speech=590237,
|
||||
text_x_tcl=590238,
|
||||
text_x_tex=590239,
|
||||
text_x_uil=590240,
|
||||
text_x_uuencode=590241,
|
||||
text_x_vcalendar=590242,
|
||||
text_x_vcard=590243,
|
||||
text_xml=590244 | 0x01000000,
|
||||
video_MP2T=393637,
|
||||
video_animaflex=393638,
|
||||
video_avi=393639,
|
||||
video_avs_video=393640,
|
||||
video_mp4=393641,
|
||||
video_mpeg=393642,
|
||||
video_quicktime=393643,
|
||||
video_vdo=393644,
|
||||
video_vivo=393645,
|
||||
video_vnd_rn_realvideo=393646,
|
||||
video_vosaic=393647,
|
||||
video_webm=393648,
|
||||
video_x_amt_demorun=393649,
|
||||
video_x_amt_showrun=393650,
|
||||
video_x_atomic3d_feature=393651,
|
||||
video_x_dl=393652,
|
||||
video_x_dv=393653,
|
||||
video_x_fli=393654,
|
||||
video_x_flv=393655,
|
||||
video_x_isvideo=393656,
|
||||
video_x_jng=393657 | 0x80000000,
|
||||
video_x_m4v=393658,
|
||||
video_x_matroska=393659,
|
||||
video_x_mng=393660,
|
||||
video_x_motion_jpeg=393661,
|
||||
video_x_ms_asf=393662,
|
||||
video_x_msvideo=393663,
|
||||
video_x_qtc=393664,
|
||||
video_x_sgi_movie=393665,
|
||||
x_epoc_x_sisx_app=721346,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_x_matlab_data: return "application/x-matlab-data";
|
||||
case application_arj: return "application/arj";
|
||||
case application_base64: return "application/base64";
|
||||
case application_binhex: return "application/binhex";
|
||||
@ -802,6 +806,8 @@ case text_mcf: return "text/mcf";
|
||||
case text_pascal: return "text/pascal";
|
||||
case text_PGP: return "text/PGP";
|
||||
case text_plain: return "text/plain";
|
||||
case text_x_script_python: return "text/x-script.python";
|
||||
case text_csv: return "text/csv";
|
||||
case application_vnd_coffeescript: return "application/vnd.coffeescript";
|
||||
case text_richtext: return "text/richtext";
|
||||
case text_rtf: return "text/rtf";
|
||||
@ -906,6 +912,7 @@ case image_x_epson_erf: return "image/x-epson-erf";
|
||||
case sist2_sidecar: return "sist2/sidecar";
|
||||
default: return NULL;}}
|
||||
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
|
||||
case 2495639202:return application_x_matlab_data;
|
||||
case 104524599:return application_arj;
|
||||
case 1388642652:return application_base64;
|
||||
case 3514823219:return application_binhex;
|
||||
@ -1194,6 +1201,7 @@ case 398963028:return text_javascript;
|
||||
case 1431272808:return text_mcf;
|
||||
case 509266722:return text_pascal;
|
||||
case 1689700070:case 794565824:case 351504808:case 214229345:case 30677878:case 1835907068:case 1154021400:case 3992351814:case 2107886487:case 2202503947:case 999008199:case 473390917:case 3679822420:case 1465078094:case 1466496025:case 2277716423:case 157353380:case 2002237032:case 4216257084:case 590894066:case 987584319:case 2268432115:case 3551958239:case 1436306077:case 3060306774:case 808890964:case 2564639436:case 3322219037:case 3334425408:case 3818365258:case 1403162576:case 590812979:case 1800036834:case 144986711:case 621471808:case 449607278:case 2403297477:case 2529069283:case 3929123204:return text_plain;
|
||||
case 194218739:return text_x_script_python;
|
||||
case 1401235891:return application_vnd_coffeescript;
|
||||
case 196656302:case 1203117491:case 3183026384:return text_richtext;
|
||||
case 2119613712:return text_scriplet;
|
||||
@ -1288,6 +1296,7 @@ case 142938048:return image_x_epson_erf;
|
||||
case 287571459:return sist2_sidecar;
|
||||
default: return 0;}}
|
||||
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
|
||||
case 3272851765: return application_x_matlab_data;
|
||||
case 3812269631: return application_arj;
|
||||
case 2479484568: return application_base64;
|
||||
case 3891182180: return application_binhex;
|
||||
@ -1635,6 +1644,8 @@ case 768274928: return text_mcf;
|
||||
case 3970938585: return text_pascal;
|
||||
case 1059844876: return text_PGP;
|
||||
case 1152832851: return text_plain;
|
||||
case 3112468514: return text_x_script_python;
|
||||
case 1881267919: return text_csv;
|
||||
case 2809123822: return application_vnd_coffeescript;
|
||||
case 4000659158: return text_richtext;
|
||||
case 1060344107: return text_rtf;
|
||||
|
@ -46,17 +46,13 @@ file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath)
|
||||
return FILETYPE_MEDIA;
|
||||
} else if (IS_PDF(mime)) {
|
||||
return FILETYPE_EBOOK;
|
||||
} else if (major_mime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
if (IS_MARKUP(mime)) {
|
||||
return FILETYPE_MARKUP;
|
||||
} else {
|
||||
return FILETYPE_TEXT;
|
||||
}
|
||||
|
||||
} else if (IS_MARKUP(mime)) {
|
||||
return FILETYPE_MARKUP;
|
||||
} else if (major_mime == MimeText) {
|
||||
return FILETYPE_TEXT;
|
||||
} else if (IS_FONT(mime)) {
|
||||
return FILETYPE_FONT;
|
||||
} else if (
|
||||
ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
} else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
|
||||
IS_ARC(mime) ||
|
||||
(IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
|
||||
)) {
|
||||
@ -98,10 +94,6 @@ int get_mime(parse_job_t *job) {
|
||||
}
|
||||
}
|
||||
|
||||
if (strlen(extension) == 0 && strlen(job->filepath + job->base) == 40) {
|
||||
fprintf(stderr, "GIT? %s", job->filepath);
|
||||
}
|
||||
|
||||
if (ScanCtx.fast) {
|
||||
return 0;
|
||||
}
|
||||
@ -122,7 +114,6 @@ int get_mime(parse_job_t *job) {
|
||||
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc));
|
||||
}
|
||||
|
||||
|
||||
return GET_MIME_ERROR_FATAL;
|
||||
}
|
||||
|
||||
@ -130,12 +121,13 @@ int get_mime(parse_job_t *job) {
|
||||
|
||||
if (magic_mime_str != NULL) {
|
||||
mime = (int) mime_get_mime_by_string(magic_mime_str);
|
||||
free(magic_mime_str);
|
||||
|
||||
if (mime == 0) {
|
||||
LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
|
||||
free(magic_mime_str);
|
||||
return 0;
|
||||
}
|
||||
free(magic_mime_str);
|
||||
}
|
||||
|
||||
if (job->vfile.reset != NULL) {
|
||||
@ -163,14 +155,11 @@ void parse(parse_job_t *job) {
|
||||
doc->meta_head = NULL;
|
||||
doc->meta_tail = NULL;
|
||||
doc->size = job->vfile.st_size;
|
||||
doc->mtime = (int) job->vfile.mtime;
|
||||
doc->mtime = job->vfile.mtime;
|
||||
doc->mime = get_mime(job);
|
||||
generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
|
||||
|
||||
if (doc->mime == GET_MIME_ERROR_FATAL) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_failed_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
@ -178,9 +167,6 @@ void parse(parse_job_t *job) {
|
||||
}
|
||||
|
||||
if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
ScanCtx.dbg_skipped_files_count += 1;
|
||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
free(doc);
|
||||
@ -246,7 +232,7 @@ void parse(parse_job_t *job) {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent)
|
||||
APPEND_META((doc), meta_parent);
|
||||
}
|
||||
|
||||
CLOSE_FILE(job->vfile)
|
||||
@ -254,7 +240,7 @@ void parse(parse_job_t *job) {
|
||||
if (job->vfile.has_checksum) {
|
||||
char sha1_digest_str[SHA1_STR_LENGTH];
|
||||
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
|
||||
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str)
|
||||
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
|
||||
}
|
||||
|
||||
write_document(doc);
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
#define UNUSED(x) __attribute__((__unused__)) x
|
||||
|
||||
#define MAX_THREADS (256)
|
||||
|
||||
#include "util.h"
|
||||
#include "log.h"
|
||||
#include "types.h"
|
||||
|
153
src/tpool.c
153
src/tpool.c
@ -6,7 +6,7 @@
|
||||
#include <sys/wait.h>
|
||||
#include "parsing/parse.h"
|
||||
|
||||
#define BLANK_STR " "
|
||||
#define BLANK_STR " "
|
||||
|
||||
typedef struct {
|
||||
int thread_id;
|
||||
@ -17,7 +17,6 @@ typedef struct {
|
||||
typedef struct tpool {
|
||||
pthread_t threads[256];
|
||||
int num_threads;
|
||||
int fork;
|
||||
|
||||
int print_progress;
|
||||
|
||||
@ -32,6 +31,8 @@ typedef struct tpool {
|
||||
pthread_cond_t workers_initialized_cond;
|
||||
int busy_count;
|
||||
int initialized_count;
|
||||
int thread_id_to_pid_mapping[MAX_THREADS];
|
||||
char ipc_database_filepath[128];
|
||||
} *shm;
|
||||
} tpool_t;
|
||||
|
||||
@ -43,11 +44,6 @@ void job_destroy(job_t *job) {
|
||||
free(job);
|
||||
}
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool) {
|
||||
// TODO
|
||||
LOG_DEBUGF("tpool.c", "pool->num_threads = %d", pool->num_threads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Push work object to thread pool
|
||||
*/
|
||||
@ -130,108 +126,124 @@ static void worker_thread_loop(tpool_t *pool) {
|
||||
}
|
||||
|
||||
static void worker_proc_init(tpool_t *pool, int thread_id) {
|
||||
// TODO create PID -> thread_id mapping for signal handler
|
||||
pthread_mutex_lock(&pool->shm->data_mutex);
|
||||
pool->shm->thread_id_to_pid_mapping[thread_id] = getpid();
|
||||
pthread_mutex_unlock(&pool->shm->data_mutex);
|
||||
|
||||
ProcData.thread_id = thread_id;
|
||||
|
||||
if (ScanCtx.index.path[0] != '\0') {
|
||||
// TODO This should be closed in proc cleanup function
|
||||
ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE);
|
||||
ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_open(ProcData.index_db);
|
||||
}
|
||||
|
||||
// TODO /dev/shm
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_CONSUMER_DATABASE);
|
||||
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE);
|
||||
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_open(ProcData.ipc_db);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
}
|
||||
|
||||
void worker_proc_cleanup(tpool_t* pool) {
|
||||
void worker_proc_cleanup(tpool_t *pool) {
|
||||
if (ProcData.index_db != NULL) {
|
||||
database_close(ProcData.index_db, FALSE);
|
||||
}
|
||||
database_close(ProcData.ipc_db, FALSE);
|
||||
}
|
||||
|
||||
#ifndef SIST_DEBUG
|
||||
#define TPOOL_FORK
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Thread worker function
|
||||
*/
|
||||
static void *tpool_worker(void *arg) {
|
||||
tpool_t *pool = ((start_thread_arg_t *) arg)->pool;
|
||||
|
||||
if (pool->fork) {
|
||||
while (TRUE) {
|
||||
int pid = fork();
|
||||
#ifdef TPOOL_FORK
|
||||
while (TRUE) {
|
||||
int pid = fork();
|
||||
|
||||
if (pid == 0) {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
if (pid == 0) {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_proc_cleanup(pool);
|
||||
worker_proc_cleanup(pool);
|
||||
|
||||
exit(0);
|
||||
exit(0);
|
||||
|
||||
} else {
|
||||
int status;
|
||||
// TODO: On crash, print debug info and resume thread
|
||||
waitpid(pid, &status, 0);
|
||||
} else {
|
||||
int status;
|
||||
waitpid(pid, &status, 0);
|
||||
|
||||
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
|
||||
LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
|
||||
pool->shm->ipc_ctx.completed_job_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
|
||||
pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
|
||||
pool->shm->ipc_ctx.completed_job_count += 1;
|
||||
pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
|
||||
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
pthread_mutex_lock(&(pool->shm->data_mutex));
|
||||
pool->shm->busy_count -= 1;
|
||||
pthread_mutex_unlock(&(pool->shm->data_mutex));
|
||||
|
||||
if (WIFSIGNALED(status)) {
|
||||
// TODO: Get current_job based on PID
|
||||
const char *job_filepath = "TODO";
|
||||
|
||||
LOG_FATALF_NO_EXIT(
|
||||
"tpool.c",
|
||||
"Child process was terminated by signal (%s).\n"
|
||||
BLANK_STR "The process was working on %s",
|
||||
strsignal(WTERMSIG(status)),
|
||||
job_filepath
|
||||
);
|
||||
if (WIFSIGNALED(status)) {
|
||||
int crashed_thread_id = -1;
|
||||
for (int i = 0; i < MAX_THREADS; i++) {
|
||||
if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
|
||||
crashed_thread_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
const char *job_filepath;
|
||||
if (crashed_thread_id != -1) {
|
||||
job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id];
|
||||
} else {
|
||||
job_filepath = "unknown";
|
||||
}
|
||||
|
||||
LOG_FATALF_NO_EXIT(
|
||||
"tpool.c",
|
||||
"Child process crashed (%s).\n"
|
||||
BLANK_STR "The process was working on %s\n"
|
||||
BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n"
|
||||
BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n",
|
||||
strsignal(WTERMSIG(status)),
|
||||
job_filepath
|
||||
);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->workers_initialized_cond);
|
||||
pool->shm->initialized_count += 1;
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
|
||||
worker_thread_loop(pool);
|
||||
|
||||
pthread_mutex_lock(&pool->shm->mutex);
|
||||
pthread_cond_signal(&pool->shm->done_working_cond);
|
||||
pthread_mutex_unlock(&pool->shm->mutex);
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -295,13 +307,10 @@ void tpool_destroy(tpool_t *pool) {
|
||||
*/
|
||||
tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
|
||||
int fork = FALSE;
|
||||
|
||||
tpool_t *pool = malloc(sizeof(tpool_t));
|
||||
|
||||
pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
pool->fork = fork;
|
||||
pool->num_threads = thread_cnt;
|
||||
pool->shm->ipc_ctx.job_count = 0;
|
||||
pool->shm->ipc_ctx.no_more_jobs = FALSE;
|
||||
@ -310,6 +319,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
pool->shm->job_type = JOB_UNDEFINED;
|
||||
memset(pool->threads, 0, sizeof(pool->threads));
|
||||
pool->print_progress = print_progress;
|
||||
sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
|
||||
|
||||
pthread_mutexattr_t mutexattr;
|
||||
pthread_mutexattr_init(&mutexattr);
|
||||
@ -329,10 +339,7 @@ tpool_t *tpool_create(int thread_cnt, int print_progress) {
|
||||
pthread_cond_init(&(pool->shm->done_working_cond), &condattr);
|
||||
pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr);
|
||||
|
||||
remove("/dev/shm/ipc.sist2");
|
||||
remove("/dev/shm/ipc.sist2-wal");
|
||||
remove("/dev/shm/ipc.sist2-shm");
|
||||
ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_PRODUCER_DATABASE);
|
||||
ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE);
|
||||
ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
|
||||
database_initialize(ProcData.ipc_db);
|
||||
|
||||
|
@ -19,8 +19,6 @@ int tpool_add_work(tpool_t *pool, job_t *job);
|
||||
|
||||
void tpool_wait(tpool_t *pool);
|
||||
|
||||
void tpool_dump_debug_info(tpool_t *pool);
|
||||
|
||||
void job_destroy(job_t *job);
|
||||
|
||||
#endif
|
||||
|
8
third-party/libscan/libscan/arc/arc.c
vendored
8
third-party/libscan/libscan/arc/arc.c
vendored
@ -147,7 +147,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
|
||||
}
|
||||
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
||||
CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a));
|
||||
archive_read_free(a);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
@ -169,7 +169,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
|
||||
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
||||
meta_list->key = MetaContent;
|
||||
strcpy(meta_list->str_val, buf.buf);
|
||||
APPEND_META(doc, meta_list)
|
||||
APPEND_META(doc, meta_list);
|
||||
dyn_buffer_destroy(&buf);
|
||||
|
||||
} else {
|
||||
@ -212,13 +212,13 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
|
||||
double decompressed_size_ratio = (double) sub_job->vfile.st_size / (double) f->st_size;
|
||||
if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) {
|
||||
CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath,
|
||||
decompressed_size_ratio)
|
||||
decompressed_size_ratio);
|
||||
break;
|
||||
}
|
||||
|
||||
// Handle excludes
|
||||
if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
|
||||
CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
|
||||
CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
4
third-party/libscan/libscan/comic/comic.c
vendored
4
third-party/libscan/libscan/comic/comic.c
vendored
@ -18,7 +18,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
|
||||
CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a));
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
@ -38,7 +38,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (read != entry_size) {
|
||||
const char *err_str = archive_error_string(a);
|
||||
if (err_str) {
|
||||
CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str)
|
||||
CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str);
|
||||
}
|
||||
free(buf);
|
||||
break;
|
||||
|
38
third-party/libscan/libscan/ebook/ebook.c
vendored
38
third-party/libscan/libscan/ebook/ebook.c
vendored
@ -54,7 +54,7 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd
|
||||
fz_catch(fzctx)err = 1;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -86,14 +86,14 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd
|
||||
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message);
|
||||
fz_drop_page(fzctx, *cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pixmap->n != 3) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
|
||||
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n);
|
||||
fz_drop_page(fzctx, *cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
return NULL;
|
||||
@ -113,7 +113,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
||||
if (pixmap_is_blank(pixmap)) {
|
||||
fz_drop_page(fzctx, cover);
|
||||
fz_drop_pixmap(fzctx, pixmap);
|
||||
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
|
||||
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead");
|
||||
pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
|
||||
if (pixmap == NULL) {
|
||||
return FALSE;
|
||||
@ -161,7 +161,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store(doc->doc_id, 0, (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
free(samples);
|
||||
@ -180,14 +180,14 @@ void fz_err_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
|
||||
}
|
||||
|
||||
void fz_warn_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message)
|
||||
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
|
||||
}
|
||||
|
||||
static void init_fzctx(fz_context *fzctx, document_t *doc) {
|
||||
@ -243,7 +243,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
|
||||
|
||||
if (img->w >= MIN_OCR_WIDTH && img->h >= MIN_OCR_HEIGHT && OCR_IS_VALID_BPP(img->n)) {
|
||||
fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
|
||||
ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb);
|
||||
ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, (int)pix->stride, pix->xres, fill_image_ocr_cb);
|
||||
fz_drop_pixmap(fzctx, pix);
|
||||
}
|
||||
}
|
||||
@ -282,14 +282,14 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message);
|
||||
fz_drop_stream(fzctx, stream);
|
||||
fz_drop_document(fzctx, fzdoc);
|
||||
fz_drop_context(fzctx);
|
||||
return;
|
||||
}
|
||||
|
||||
APPEND_LONG_META(doc, MetaPages, page_count)
|
||||
APPEND_LONG_META(doc, MetaPages, page_count);
|
||||
|
||||
if (ctx->enable_tn) {
|
||||
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
|
||||
@ -312,7 +312,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
fz_catch(fzctx);
|
||||
|
||||
if (strlen(title) > 0) {
|
||||
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||
APPEND_UTF8_META(doc, MetaTitle, title);
|
||||
}
|
||||
|
||||
char author[4096] = {'\0',};
|
||||
@ -320,7 +320,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
fz_catch(fzctx);
|
||||
|
||||
if (strlen(author) > 0) {
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author);
|
||||
}
|
||||
|
||||
|
||||
@ -334,7 +334,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page);
|
||||
fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message);
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(fzctx, page);
|
||||
fz_drop_stream(fzctx, stream);
|
||||
@ -363,7 +363,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
} fz_catch(fzctx)err = fzctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message);
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
fz_drop_page(fzctx, page);
|
||||
fz_drop_stext_page(fzctx, stext);
|
||||
@ -393,7 +393,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
APPEND_META(doc, meta_content);
|
||||
|
||||
text_buffer_destroy(&thread_buffer);
|
||||
}
|
||||
@ -418,7 +418,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a))
|
||||
CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a));
|
||||
archive_read_free(a);
|
||||
return;
|
||||
}
|
||||
@ -439,7 +439,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (read != entry_size) {
|
||||
const char *err_str = archive_error_string(a);
|
||||
if (err_str) {
|
||||
CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str)
|
||||
CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str);
|
||||
}
|
||||
free(buf);
|
||||
break;
|
||||
@ -460,7 +460,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
APPEND_META(doc, meta_content);
|
||||
|
||||
text_buffer_destroy(&content_buffer);
|
||||
|
||||
@ -477,7 +477,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, docume
|
||||
size_t buf_len;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
12
third-party/libscan/libscan/font/font.c
vendored
12
third-party/libscan/libscan/font/font.c
vendored
@ -146,7 +146,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
size_t buf_len = 0;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -154,7 +154,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
FT_Error_String(err));
|
||||
free(buf);
|
||||
return;
|
||||
}
|
||||
@ -174,7 +174,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
|
||||
meta_name->key = MetaFontName;
|
||||
strcpy(meta_name->str_val, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
APPEND_META(doc, meta_name);
|
||||
|
||||
if (!ctx->enable_tn) {
|
||||
FT_Done_Face(face);
|
||||
@ -188,7 +188,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
err = FT_Set_Pixel_Sizes(face, 0, pixel);
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
FT_Error_String(err));
|
||||
FT_Done_Face(face);
|
||||
free(buf);
|
||||
return;
|
||||
@ -210,7 +210,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
|
||||
if (err != 0) {
|
||||
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err,
|
||||
FT_Error_String(err))
|
||||
FT_Error_String(err));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
dyn_buffer_t bmp_data = dyn_buffer_create();
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store(doc->doc_id, 0, bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
|
2
third-party/libscan/libscan/json/json.c
vendored
2
third-party/libscan/libscan/json/json.c
vendored
@ -33,7 +33,7 @@ int json_extract_text(cJSON *json, text_buffer_t *tex) {
|
||||
scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
if (f->st_size > JSON_MAX_FILE_SIZE) {
|
||||
CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath)
|
||||
CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath);
|
||||
return SCAN_ERR_SKIP;
|
||||
}
|
||||
|
||||
|
16
third-party/libscan/libscan/macros.h
vendored
16
third-party/libscan/libscan/macros.h
vendored
@ -25,20 +25,20 @@
|
||||
#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
|
||||
#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)
|
||||
|
||||
#define APPEND_STR_META(doc, keyname, value) \
|
||||
#define APPEND_STR_META(doc, keyname, value) do {\
|
||||
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||
meta_str->key = keyname; \
|
||||
strcpy(meta_str->str_val, value); \
|
||||
APPEND_META(doc, meta_str)}
|
||||
APPEND_META(doc, meta_str);}} while(0)
|
||||
|
||||
#define APPEND_LONG_META(doc, keyname, value) \
|
||||
#define APPEND_LONG_META(doc, keyname, value) do{\
|
||||
{meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
|
||||
meta_long->key = keyname; \
|
||||
meta_long->long_val = value; \
|
||||
APPEND_META(doc, meta_long)}
|
||||
APPEND_META(doc, meta_long);}} while(0)
|
||||
|
||||
|
||||
#define APPEND_META(doc, meta) \
|
||||
#define APPEND_META(doc, meta) do {\
|
||||
meta->next = NULL;\
|
||||
if (doc->meta_head == NULL) {\
|
||||
doc->meta_head = meta;\
|
||||
@ -46,7 +46,7 @@
|
||||
} else {\
|
||||
doc->meta_tail->next = meta;\
|
||||
doc->meta_tail = meta;\
|
||||
}
|
||||
}}while(0)
|
||||
|
||||
#define APPEND_UTF8_META(doc, keyname, str) \
|
||||
text_buffer_t tex = text_buffer_create(-1); \
|
||||
@ -55,5 +55,5 @@
|
||||
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
|
||||
meta_tag->key = keyname; \
|
||||
strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \
|
||||
APPEND_META(doc, meta_tag) \
|
||||
text_buffer_destroy(&tex);
|
||||
APPEND_META(doc, meta_tag); \
|
||||
text_buffer_destroy(&tex)
|
||||
|
92
third-party/libscan/libscan/media/media.c
vendored
92
third-party/libscan/libscan/media/media.c
vendored
@ -163,7 +163,7 @@ static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, i
|
||||
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
text_buffer_destroy(&tex);
|
||||
avcodec_free_context(&decoder);
|
||||
}
|
||||
@ -190,7 +190,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
|
||||
CTX_LOG_WARNINGF(doc->filepath,
|
||||
"(media.c) avcodec_read_frame() returned error code [%d] %s",
|
||||
read_frame_ret, av_err2str(read_frame_ret)
|
||||
)
|
||||
);
|
||||
}
|
||||
frame_and_packet_free(result);
|
||||
return NULL;
|
||||
@ -210,7 +210,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
|
||||
CTX_LOG_ERRORF(doc->filepath,
|
||||
"(media.c) avcodec_send_packet() returned error code [%d] %s",
|
||||
decode_ret, av_err2str(decode_ret)
|
||||
)
|
||||
);
|
||||
frame_and_packet_free(result);
|
||||
return NULL;
|
||||
}
|
||||
@ -230,7 +230,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
|
||||
while (meta != NULL) {
|
||||
if (meta->key == key) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
|
||||
key, meta->str_val, key, tag->value)
|
||||
key, meta->str_val, key, tag->value);
|
||||
return;
|
||||
}
|
||||
meta = meta->next;
|
||||
@ -243,7 +243,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
|
||||
meta_tag->key = key;
|
||||
strcpy(meta_tag->str_val, tex.dyn_buffer.buf);
|
||||
|
||||
APPEND_META(doc, meta_tag)
|
||||
APPEND_META(doc, meta_tag);
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
|
||||
@ -253,7 +253,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
|
||||
#define STRCPY_TOLOWER(dst, str) \
|
||||
strncpy(dst, str, sizeof(dst)); \
|
||||
char *ptr = dst; \
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
|
||||
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr)
|
||||
|
||||
__always_inline
|
||||
static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
|
||||
@ -261,18 +261,18 @@ static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx
|
||||
AVDictionaryEntry *tag = NULL;
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
STRCPY_TOLOWER(key, tag->key);
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
APPEND_TAG_META(MetaArtist)
|
||||
APPEND_TAG_META(MetaArtist);
|
||||
} else if (strcmp(key, "genre") == 0) {
|
||||
APPEND_TAG_META(MetaGenre)
|
||||
APPEND_TAG_META(MetaGenre);
|
||||
} else if (strcmp(key, "title") == 0) {
|
||||
APPEND_TAG_META(MetaTitle)
|
||||
APPEND_TAG_META(MetaTitle);
|
||||
} else if (strcmp(key, "album_artist") == 0) {
|
||||
APPEND_TAG_META(MetaAlbumArtist)
|
||||
APPEND_TAG_META(MetaAlbumArtist);
|
||||
} else if (strcmp(key, "album") == 0) {
|
||||
APPEND_TAG_META(MetaAlbum)
|
||||
APPEND_TAG_META(MetaAlbum);
|
||||
} else if (strcmp(key, "comment") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
|
||||
}
|
||||
@ -291,14 +291,14 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
|
||||
if (meta_duration->long_val > INT32_MAX) {
|
||||
meta_duration->long_val = 0;
|
||||
}
|
||||
APPEND_META(doc, meta_duration)
|
||||
APPEND_META(doc, meta_duration);
|
||||
}
|
||||
|
||||
if (pFormatCtx->bit_rate != 0) {
|
||||
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
|
||||
meta_bitrate->key = MetaMediaBitrate;
|
||||
meta_bitrate->long_val = pFormatCtx->bit_rate;
|
||||
APPEND_META(doc, meta_bitrate)
|
||||
APPEND_META(doc, meta_bitrate);
|
||||
}
|
||||
}
|
||||
|
||||
@ -306,7 +306,7 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
|
||||
if (is_video) {
|
||||
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
STRCPY_TOLOWER(key, tag->key);
|
||||
|
||||
if (strcmp(key, "title") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle);
|
||||
@ -320,38 +320,38 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
|
||||
// EXIF metadata
|
||||
while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
|
||||
char key[256];
|
||||
STRCPY_TOLOWER(key, tag->key)
|
||||
STRCPY_TOLOWER(key, tag->key);
|
||||
|
||||
if (strcmp(key, "artist") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
|
||||
} else if (strcmp(key, "imagedescription") == 0) {
|
||||
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
|
||||
} else if (strcmp(key, "make") == 0) {
|
||||
APPEND_TAG_META(MetaExifMake)
|
||||
APPEND_TAG_META(MetaExifMake);
|
||||
} else if (strcmp(key, "model") == 0) {
|
||||
APPEND_TAG_META(MetaExifModel)
|
||||
APPEND_TAG_META(MetaExifModel);
|
||||
} else if (strcmp(key, "software") == 0) {
|
||||
APPEND_TAG_META(MetaExifSoftware)
|
||||
APPEND_TAG_META(MetaExifSoftware);
|
||||
} else if (strcmp(key, "fnumber") == 0) {
|
||||
APPEND_TAG_META(MetaExifFNumber)
|
||||
APPEND_TAG_META(MetaExifFNumber);
|
||||
} else if (strcmp(key, "focallength") == 0) {
|
||||
APPEND_TAG_META(MetaExifFocalLength)
|
||||
APPEND_TAG_META(MetaExifFocalLength);
|
||||
} else if (strcmp(key, "usercomment") == 0) {
|
||||
APPEND_TAG_META(MetaExifUserComment)
|
||||
APPEND_TAG_META(MetaExifUserComment);
|
||||
} else if (strcmp(key, "isospeedratings") == 0) {
|
||||
APPEND_TAG_META(MetaExifIsoSpeedRatings)
|
||||
APPEND_TAG_META(MetaExifIsoSpeedRatings);
|
||||
} else if (strcmp(key, "exposuretime") == 0) {
|
||||
APPEND_TAG_META(MetaExifExposureTime)
|
||||
APPEND_TAG_META(MetaExifExposureTime);
|
||||
} else if (strcmp(key, "datetime") == 0) {
|
||||
APPEND_TAG_META(MetaExifDateTime)
|
||||
APPEND_TAG_META(MetaExifDateTime);
|
||||
} else if (strcmp(key, "gpslatitude") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeDMS)
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeDMS);
|
||||
} else if (strcmp(key, "gpslatituderef") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeRef)
|
||||
APPEND_TAG_META(MetaExifGpsLatitudeRef);
|
||||
} else if (strcmp(key, "gpslongitude") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeDMS)
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeDMS);
|
||||
} else if (strcmp(key, "gpslongituderef") == 0) {
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeRef)
|
||||
APPEND_TAG_META(MetaExifGpsLongitudeRef);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -432,11 +432,11 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
CTX_LOG_DEBUGF(
|
||||
doc->filepath,
|
||||
"(media.c) Could not seek media file: %s", av_err2str(seek_ret)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (seek_ok == FALSE && thumbnail_index != 0) {
|
||||
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
|
||||
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.");
|
||||
return SAVE_THUMBNAIL_FAILED;
|
||||
}
|
||||
}
|
||||
@ -522,7 +522,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||
|
||||
if (desc != NULL) {
|
||||
APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name)
|
||||
APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name);
|
||||
}
|
||||
|
||||
audio_stream = i;
|
||||
@ -533,18 +533,18 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
||||
const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);
|
||||
|
||||
if (desc != NULL) {
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name)
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name);
|
||||
}
|
||||
|
||||
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
|
||||
meta_w->key = MetaWidth;
|
||||
meta_w->long_val = stream->codecpar->width;
|
||||
APPEND_META(doc, meta_w)
|
||||
APPEND_META(doc, meta_w);
|
||||
|
||||
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
|
||||
meta_h->key = MetaHeight;
|
||||
meta_h->long_val = stream->codecpar->height;
|
||||
APPEND_META(doc, meta_h)
|
||||
APPEND_META(doc, meta_h);
|
||||
|
||||
video_stream = i;
|
||||
}
|
||||
@ -611,7 +611,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
||||
}
|
||||
|
||||
if (number_of_thumbnails_generated > 0) {
|
||||
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated);
|
||||
}
|
||||
|
||||
avcodec_free_context(&decoder);
|
||||
@ -625,12 +625,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
|
||||
return;
|
||||
}
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res));
|
||||
avformat_close_input(&pFormatCtx);
|
||||
avformat_free_context(pFormatCtx);
|
||||
return;
|
||||
@ -724,7 +724,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -737,13 +737,13 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,
|
||||
if (f->st_size <= ctx->max_media_buffer) {
|
||||
int ret = memfile_open(f, &memfile);
|
||||
if (ret == 0) {
|
||||
CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->st_size)
|
||||
CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->st_size);
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||
}
|
||||
}
|
||||
|
||||
if (io_ctx == NULL) {
|
||||
CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->st_size)
|
||||
CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->st_size);
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
|
||||
}
|
||||
|
||||
@ -752,7 +752,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,
|
||||
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
|
||||
if (res < 0) {
|
||||
if (res != -5) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
|
||||
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res));
|
||||
}
|
||||
av_free(io_ctx->buffer);
|
||||
memfile_close(&memfile);
|
||||
@ -787,7 +787,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
AVFormatContext *pFormatCtx = avformat_alloc_context();
|
||||
if (pFormatCtx == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
|
||||
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
@ -795,7 +795,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
int ret = memfile_open_buf(buf, buf_len, &memfile);
|
||||
if (ret == 0) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len)
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len);
|
||||
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
|
||||
} else {
|
||||
avformat_close_input(&pFormatCtx);
|
||||
@ -850,7 +850,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
}
|
||||
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
@ -863,7 +863,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
// Save thumbnail
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
|
18
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
18
third-party/libscan/libscan/mobi/scan_mobi.c
vendored
@ -8,7 +8,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
MOBIData *m = mobi_init();
|
||||
if (m == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "mobi_init() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
char* buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
mobi_free(m);
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (file == NULL) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -33,25 +33,25 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (mobi_ret != MOBI_SUCCESS) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret);
|
||||
return;
|
||||
}
|
||||
|
||||
char *author = mobi_meta_get_author(m);
|
||||
if (author != NULL) {
|
||||
APPEND_STR_META(doc, MetaAuthor, author)
|
||||
APPEND_STR_META(doc, MetaAuthor, author);
|
||||
free(author);
|
||||
}
|
||||
char *title = mobi_meta_get_title(m);
|
||||
if (title != NULL) {
|
||||
APPEND_STR_META(doc, MetaTitle, title)
|
||||
APPEND_STR_META(doc, MetaTitle, title);
|
||||
free(title);
|
||||
}
|
||||
|
||||
const size_t maxlen = mobi_get_text_maxsize(m);
|
||||
if (maxlen == MOBI_NOTSET) {
|
||||
free(buf);
|
||||
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
|
||||
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -62,7 +62,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
mobi_free(m);
|
||||
free(content_str);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -70,7 +70,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
text_buffer_append_markup(&tex, content_str);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
|
||||
free(content_str);
|
||||
free(buf);
|
||||
|
10
third-party/libscan/libscan/msdoc/msdoc.c
vendored
10
third-party/libscan/libscan/msdoc/msdoc.c
vendored
@ -39,12 +39,12 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi
|
||||
iInitDocument(file_in, (int) buf_len);
|
||||
const char *author = szGetAuthor();
|
||||
if (author != NULL) {
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author)
|
||||
APPEND_UTF8_META(doc, MetaAuthor, author);
|
||||
}
|
||||
|
||||
const char *title = szGetTitle();
|
||||
if (title != NULL) {
|
||||
APPEND_UTF8_META(doc, MetaTitle, title)
|
||||
APPEND_UTF8_META(doc, MetaTitle, title);
|
||||
}
|
||||
vFreeDocument();
|
||||
|
||||
@ -60,7 +60,7 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
APPEND_META(doc, meta_content);
|
||||
|
||||
text_buffer_destroy(&tex);
|
||||
}
|
||||
@ -74,14 +74,14 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
size_t buf_len;
|
||||
char *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||
if (file == NULL) {
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno);
|
||||
return;
|
||||
}
|
||||
|
||||
|
30
third-party/libscan/libscan/ooxml/ooxml.c
vendored
30
third-party/libscan/libscan/ooxml/ooxml.c
vendored
@ -39,7 +39,7 @@ int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_
|
||||
xmlErrorPtr err = xmlGetLastError();
|
||||
if (err != NULL) {
|
||||
if (err->level == XML_ERR_FATAL) {
|
||||
CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
|
||||
CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@ -85,13 +85,13 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
|
||||
return READ_PART_ERR;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document");
|
||||
xmlFreeDoc(xml);
|
||||
return READ_PART_ERR;
|
||||
}
|
||||
@ -108,13 +108,13 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
|
||||
return -1;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document");
|
||||
xmlFreeDoc(xml);
|
||||
return -1;
|
||||
}
|
||||
@ -127,7 +127,7 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document
|
||||
}
|
||||
|
||||
if (xmlStrEqual(child->name, _X("Pages"))) {
|
||||
APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10))
|
||||
APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10));
|
||||
}
|
||||
|
||||
xmlFree(text);
|
||||
@ -144,13 +144,13 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *
|
||||
XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
|
||||
|
||||
if (xml == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
|
||||
CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
|
||||
return -1;
|
||||
}
|
||||
|
||||
xmlNode *root = xmlDocGetRootElement(xml);
|
||||
if (root == NULL) {
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document")
|
||||
CTX_LOG_ERROR(doc->filepath, "Empty document");
|
||||
xmlFreeDoc(xml);
|
||||
return -1;
|
||||
}
|
||||
@ -163,11 +163,11 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *
|
||||
}
|
||||
|
||||
if (xmlStrEqual(child->name, _X("title"))) {
|
||||
APPEND_STR_META(doc, MetaTitle, (char *) text)
|
||||
APPEND_STR_META(doc, MetaTitle, (char *) text);
|
||||
} else if (xmlStrEqual(child->name, _X("creator"))) {
|
||||
APPEND_STR_META(doc, MetaAuthor, (char *) text)
|
||||
APPEND_STR_META(doc, MetaAuthor, (char *) text);
|
||||
} else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) {
|
||||
APPEND_STR_META(doc, MetaModifiedBy, (char *) text)
|
||||
APPEND_STR_META(doc, MetaModifiedBy, (char *) text);
|
||||
}
|
||||
|
||||
xmlFree(text);
|
||||
@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
|
||||
char *buf = malloc(entry_size);
|
||||
archive_read_data(a, buf, entry_size);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store(doc->doc_id, 1, buf, entry_size);
|
||||
free(buf);
|
||||
}
|
||||
@ -200,7 +200,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
size_t buf_len;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -209,7 +209,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int ret = archive_read_open_memory(a, buf, buf_len);
|
||||
if (ret != ARCHIVE_OK) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
|
||||
CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a));
|
||||
archive_read_free(a);
|
||||
free(buf);
|
||||
return;
|
||||
@ -250,7 +250,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
|
||||
meta->key = MetaContent;
|
||||
strcpy(meta->str_val, tex.dyn_buffer.buf);
|
||||
APPEND_META(doc, meta)
|
||||
APPEND_META(doc, meta);
|
||||
}
|
||||
|
||||
archive_read_close(a);
|
||||
|
42
third-party/libscan/libscan/raw/raw.c
vendored
42
third-party/libscan/libscan/raw/raw.c
vendored
@ -83,7 +83,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
|
||||
av_init_packet(&jpeg_packet);
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
@ -100,76 +100,76 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
libraw_data_t *libraw_lib = libraw_init(0);
|
||||
|
||||
if (!libraw_lib) {
|
||||
CTX_LOG_ERROR("raw.c", "Cannot create libraw handle")
|
||||
CTX_LOG_ERROR("raw.c", "Cannot create libraw handle");
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len = 0;
|
||||
void *buf = read_all(f, &buf_len);
|
||||
if (buf == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||
CTX_LOG_ERROR(f->filepath, "read_all() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
int ret = libraw_open_buffer(libraw_lib, buf, buf_len);
|
||||
if (ret != 0) {
|
||||
CTX_LOG_ERROR(f->filepath, "Could not open raw file")
|
||||
CTX_LOG_ERROR(f->filepath, "Could not open raw file");
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
}
|
||||
|
||||
if (*libraw_lib->idata.model != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model)
|
||||
APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model);
|
||||
}
|
||||
if (*libraw_lib->idata.make != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make)
|
||||
APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make);
|
||||
}
|
||||
if (*libraw_lib->idata.software != '\0') {
|
||||
APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
|
||||
APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software);
|
||||
}
|
||||
APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
|
||||
APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
|
||||
APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width);
|
||||
APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height);
|
||||
char tmp[1024];
|
||||
snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
|
||||
APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)
|
||||
APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp);
|
||||
|
||||
if (*libraw_lib->other.desc != '\0') {
|
||||
APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc)
|
||||
APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc);
|
||||
}
|
||||
if (*libraw_lib->other.artist != '\0') {
|
||||
APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist)
|
||||
APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist);
|
||||
}
|
||||
|
||||
struct tm *time = localtime(&libraw_lib->other.timestamp);
|
||||
strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time);
|
||||
APPEND_STR_META(doc, MetaExifDateTime, tmp)
|
||||
APPEND_STR_META(doc, MetaExifDateTime, tmp);
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len);
|
||||
APPEND_STR_META(doc, MetaExifFocalLength, tmp)
|
||||
APPEND_STR_META(doc, MetaExifFocalLength, tmp);
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture);
|
||||
APPEND_STR_META(doc, MetaExifFNumber, tmp)
|
||||
APPEND_STR_META(doc, MetaExifFNumber, tmp);
|
||||
|
||||
int denominator = (int) roundf(1 / libraw_lib->other.shutter);
|
||||
snprintf(tmp, sizeof(tmp), "1/%d", denominator);
|
||||
APPEND_STR_META(doc, MetaExifExposureTime, tmp)
|
||||
APPEND_STR_META(doc, MetaExifExposureTime, tmp);
|
||||
|
||||
libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
|
||||
double gps_longitude_dec =
|
||||
(gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref);
|
||||
snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
|
||||
if (gps_longitude_dec != 0.0) {
|
||||
APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
|
||||
APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp);
|
||||
}
|
||||
|
||||
double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref);
|
||||
snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec);
|
||||
if (gps_latitude_dec != 0.0) {
|
||||
APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp)
|
||||
APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp);
|
||||
}
|
||||
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
|
||||
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw");
|
||||
|
||||
if (!ctx->enable_tn) {
|
||||
free(buf);
|
||||
@ -179,7 +179,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
int unpack_ret = libraw_unpack_thumb(libraw_lib);
|
||||
if (unpack_ret != 0) {
|
||||
CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret)
|
||||
CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret);
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
@ -212,7 +212,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
ret = libraw_unpack(libraw_lib);
|
||||
if (ret != 0) {
|
||||
CTX_LOG_ERROR(f->filepath, "Could not unpack raw file")
|
||||
CTX_LOG_ERROR(f->filepath, "Could not unpack raw file");
|
||||
free(buf);
|
||||
libraw_close(libraw_lib);
|
||||
return;
|
||||
|
20
third-party/libscan/libscan/scan.h
vendored
20
third-party/libscan/libscan/scan.h
vendored
@ -34,20 +34,20 @@ typedef int scan_code_t;
|
||||
#define LEVEL_ERROR 3
|
||||
#define LEVEL_FATAL 4
|
||||
|
||||
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
|
||||
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__)
|
||||
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str)
|
||||
|
||||
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
|
||||
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__)
|
||||
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str)
|
||||
|
||||
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
|
||||
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__)
|
||||
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str)
|
||||
|
||||
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
|
||||
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
|
||||
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__)
|
||||
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str)
|
||||
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1)
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1)
|
||||
|
||||
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
|
||||
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
|
||||
|
12
third-party/libscan/libscan/text/text.c
vendored
12
third-party/libscan/libscan/text/text.c
vendored
@ -2,6 +2,10 @@
|
||||
|
||||
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
if (ctx->content_size <= 0) {
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
int to_read = MIN(ctx->content_size, f->st_size);
|
||||
|
||||
if (to_read <= 2) {
|
||||
@ -11,7 +15,7 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
char *buf = malloc(to_read);
|
||||
int ret = f->read(f, buf, to_read);
|
||||
if (ret < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
@ -39,12 +43,16 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
if (ctx->content_size <= 0) {
|
||||
return SCAN_OK;
|
||||
}
|
||||
|
||||
int to_read = MIN(MAX_MARKUP_SIZE, f->st_size);
|
||||
|
||||
char *buf = malloc(to_read + 1);
|
||||
int ret = f->read(f, buf, to_read);
|
||||
if (ret < 0) {
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
||||
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
8
third-party/libscan/libscan/wpd/wpd.c
vendored
8
third-party/libscan/libscan/wpd/wpd.c
vendored
@ -10,14 +10,14 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
wpd_confidence_t conf = wpd_is_file_format_supported(stream);
|
||||
|
||||
if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) {
|
||||
CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf)
|
||||
CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf);
|
||||
wpd_memory_stream_destroy(stream);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
}
|
||||
|
||||
if (conf != C_WPD_CONFIDENCE_EXCELLENT) {
|
||||
CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf)
|
||||
CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf);
|
||||
wpd_memory_stream_destroy(stream);
|
||||
free(buf);
|
||||
return SCAN_ERR_READ;
|
||||
@ -28,11 +28,11 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
if (res != C_WPD_OK) {
|
||||
CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)",
|
||||
doc->filepath, res)
|
||||
doc->filepath, res);
|
||||
}
|
||||
|
||||
if (tex.dyn_buffer.cur != 0) {
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
||||
}
|
||||
|
||||
text_buffer_destroy(&tex);
|
||||
|
Loading…
x
Reference in New Issue
Block a user