diff --git a/.gitignore b/.gitignore index 3a42293..8f9f8a3 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ build.ninja src/web/static_generated.c src/magic_generated.c src/index/static_generated.c +*.sist2 +*-shm \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 6857720..d3869f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,30 +22,33 @@ set(ARGPARSE_SHARED off) add_subdirectory(third-party/argparse) add_executable(sist2 + # argparse + third-party/argparse/argparse.h third-party/argparse/argparse.c + src/main.c src/sist.h src/io/walk.h src/io/walk.c - src/io/store.h src/io/store.c src/tpool.h src/tpool.c src/parsing/parse.h src/parsing/parse.c + src/parsing/magic_util.c src/parsing/magic_util.h src/io/serialize.h src/io/serialize.c src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c src/index/web.c src/index/web.h src/web/serve.c src/web/serve.h + src/web/web_util.c src/web/web_util.h src/index/elastic.c src/index/elastic.h src/util.c src/util.h - src/ctx.h src/types.h + src/ctx.c src/ctx.h + src/types.h src/log.c src/log.h src/cli.c src/cli.h - src/stats.c src/stats.h src/ctx.c src/parsing/sidecar.c src/parsing/sidecar.h - src/mempool/mempool.c src/mempool/mempool.h + src/database/database.c src/database/database.h + src/parsing/fs_util.h src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp - # argparse - third-party/argparse/argparse.h third-party/argparse/argparse.c - ) + src/database/database_stats.c src/database/database_stats.h src/database/database_schema.c) set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) @@ -53,8 +56,6 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) find_package(PkgConfig REQUIRED) -pkg_search_module(GLIB REQUIRED glib-2.0) - find_package(lmdb CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED) @@ -63,6 +64,7 @@ find_library(MAGIC_LIB NAMES libmagic.so.1 magic PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ ) +find_package(unofficial-sqlite3 CONFIG REQUIRED) target_include_directories( @@ -71,7 +73,6 @@ target_include_directories( ${CMAKE_SOURCE_DIR}/third-party/utf8.h/ ${CMAKE_SOURCE_DIR}/third-party/libscan/ ${CMAKE_SOURCE_DIR}/ - ${GLIB_INCLUDE_DIRS} ) target_compile_options( @@ -90,6 +91,7 @@ if (SIST_DEBUG) -fsanitize=address -fno-inline # -O2 + -w ) target_link_options( sist2 @@ -121,6 +123,7 @@ else () -Ofast -fno-stack-protector -fomit-frame-pointer + -w ) endif () @@ -137,17 +140,15 @@ target_link_libraries( lmdb cjson argparse - ${GLIB_LDFLAGS} unofficial::mongoose::mongoose CURL::libcurl pthread - c - scan ${MAGIC_LIB} + unofficial::sqlite3::sqlite3 ) add_custom_target( diff --git a/README.md b/README.md index 298dbe2..39d0783 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux ```bash vcpkg install curl[core,openssl] - vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo + vcpkg install lmdb sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo ``` 1. Build diff --git a/scripts/before_build.sh b/scripts/before_build.sh index 5e393e5..8b32302 100755 --- a/scripts/before_build.sh +++ b/scripts/before_build.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash -rm -rf index.sist2/ +( + cd .. + rm -rf index.sist2 -python3 scripts/mime.py > src/parsing/mime_generated.c -python3 scripts/serve_static.py > src/web/static_generated.c -python3 scripts/index_static.py > src/index/static_generated.c -python3 scripts/magic_static.py > src/magic_generated.c + python3 scripts/mime.py > src/parsing/mime_generated.c + python3 scripts/serve_static.py > src/web/static_generated.c + python3 scripts/index_static.py > src/index/static_generated.c + python3 scripts/magic_static.py > src/magic_generated.c -printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h \ No newline at end of file + printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h +) \ No newline at end of file diff --git a/scripts/mime.csv b/scripts/mime.csv index 5b30d31..4b32f32 100644 --- a/scripts/mime.csv +++ b/scripts/mime.csv @@ -29,7 +29,7 @@ application/mime, aps application/mspowerpoint, ppz application/msword, doc|dot|w6w|wiz|word application/netmc, mcp -application/octet-stream, bin|dump|gpg +application/octet-stream, bin|dump|gpg|pack|idx application/oda, oda application/ogg, ogv application/pdf, pdf @@ -243,7 +243,7 @@ audio/make, funk|my|pfunk audio/midi, kar audio/mid, rmi audio/mp4, m4b -audio/mpeg, m2a|mpa +audio/mpeg, m2a|mpa|mpga audio/ogg, ogg audio/s3m, s3m audio/tsp-audio, tsi @@ -382,7 +382,7 @@ text/x-pascal, p text/x-perl, pl text/x-php, php text/x-po, po -text/x-python, py +text/x-python, py|pyi text/x-ruby, rb text/x-sass, sass text/x-scss, scss diff --git a/scripts/mime.py b/scripts/mime.py index d0a539a..9eae391 100644 --- a/scripts/mime.py +++ b/scripts/mime.py @@ -1,3 +1,5 @@ +import zlib + mimes = {} noparse = set() ext_in_hash = set() @@ -135,24 +137,40 @@ def clean(t): return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_") +def crc(s): + return zlib.crc32(s.encode()) & 0xffffffff + + with open("scripts/mime.csv") as f: for l in f: mime, ext_list = l.split(",") if l.startswith("!"): mime = mime[1:] noparse.add(mime) - ext = [x.strip() for x in ext_list.split("|")] + ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""] mimes[mime] = ext + seen_crc = set() + for ext in mimes.values(): + for e in ext: + if crc(e) in seen_crc: + raise Exception("CRC32 collision") + seen_crc.add(crc(e)) + + seen_crc = set() + for mime in mimes.keys(): + if crc(mime) in seen_crc: + raise Exception("CRC32 collision") + seen_crc.add(crc(mime)) + print("// **Generated by mime.py**") print("#ifndef MIME_GENERATED_C") print("#define MIME_GENERATED_C") - print("#include \n") print("#include \n") # Enum print("enum mime {") for mime, ext in sorted(mimes.items()): - print(" " + clean(mime) + "=" + mime_id(mime) + ",") + print(f"{clean(mime)}={mime_id(mime)},") print("};") # Enum -> string @@ -163,20 +181,20 @@ with open("scripts/mime.csv") as f: print("default: return NULL;}}") # Ext -> Enum - print("GHashTable *mime_get_ext_table() {" - "GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);") + print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {" + "switch (extension_crc32) {") for mime, ext in mimes.items(): - for e in [e for e in ext if e]: - print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");") - if e in ext_in_hash: - raise Exception("extension already in hash: " + e) - ext_in_hash.add(e) - print("return ext_table;}") + if len(ext) > 0: + for e in ext: + print(f"case {crc(e)}:", end="") + print(f"return {clean(mime)};") + print("default: return 0;}}") # string -> Enum - print("GHashTable *mime_get_mime_table() {" - "GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);") - for mime, ext in mimes.items(): - print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");") - print("return mime_table;}") + print("unsigned int mime_name_lookup(unsigned long mime_crc32) {" + "switch (mime_crc32) {") + for mime in mimes.keys(): + print(f"case {crc(mime)}: return {clean(mime)};") + + print("default: return 0;}}") print("#endif") diff --git a/sist2-admin/frontend/public/index.html b/sist2-admin/frontend/public/index.html index 78f9a73..9af01df 100644 --- a/sist2-admin/frontend/public/index.html +++ b/sist2-admin/frontend/public/index.html @@ -4,7 +4,7 @@ - + sist2-admin diff --git a/src/auth0/auth0_c_api.h b/src/auth0/auth0_c_api.h index c90e322..a46ebfa 100644 --- a/src/auth0/auth0_c_api.h +++ b/src/auth0/auth0_c_api.h @@ -1,12 +1,13 @@ #ifndef SIST2_AUTH0_C_API_H #define SIST2_AUTH0_C_API_H -#include "stdlib.h" #ifdef __cplusplus #define EXTERNC extern "C" +#include "cstdlib" #else #define EXTERNC +#include "stdlib.h" #endif #define AUTH0_OK (0) diff --git a/src/cli.c b/src/cli.c index 8ff8875..e2540ba 100644 --- a/src/cli.c +++ b/src/cli.c @@ -2,16 +2,17 @@ #include "ctx.h" #include -#define DEFAULT_OUTPUT "index.sist2/" +#define DEFAULT_OUTPUT "index.sist2" +#define DEFAULT_NAME "index" #define DEFAULT_CONTENT_SIZE 32768 #define DEFAULT_QUALITY 2 -#define DEFAULT_THUMBNAIL_SIZE 500 +#define DEFAULT_THUMBNAIL_SIZE 552 #define DEFAULT_THUMBNAIL_COUNT 1 #define DEFAULT_REWRITE_URL "" #define DEFAULT_ES_URL "http://localhost:9200" #define DEFAULT_ES_INDEX "sist2" -#define DEFAULT_BATCH_SIZE 100 +#define DEFAULT_BATCH_SIZE 70 #define DEFAULT_TAGLINE "Lightning-fast file system indexer and search tool" #define DEFAULT_LANG "en" @@ -20,8 +21,6 @@ #define DEFAULT_MAX_MEM_BUFFER 2000 -#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0 - const char *TESS_DATAPATHS[] = { "/usr/share/tessdata/", "/usr/share/tesseract-ocr/tessdata/", @@ -48,9 +47,6 @@ void scan_args_destroy(scan_args_t *args) { if (args->name != NULL) { free(args->name); } - if (args->incremental != NULL) { - free(args->incremental); - } if (args->path != NULL) { free(args->path); } @@ -61,7 +57,6 @@ void scan_args_destroy(scan_args_t *args) { } void index_args_destroy(index_args_t *args) { - //todo if (args->es_mappings_path) { free(args->es_mappings); } @@ -76,7 +71,6 @@ void index_args_destroy(index_args_t *args) { } void web_args_destroy(web_args_t *args) { - //todo free(args); } @@ -97,19 +91,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { char *abs_path = abspath(argv[1]); if (abs_path == NULL) { - LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]) + LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]); } else { + abs_path = realloc(abs_path, strlen(abs_path) + 2); + strcat(abs_path, "/"); args->path = abs_path; } - if (args->incremental != OPTION_VALUE_UNSPECIFIED) { - args->incremental = abspath(args->incremental); - if (abs_path == NULL) { - sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature."); - args->incremental = NULL; - } - } - if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) { args->tn_quality = DEFAULT_QUALITY; } else if (args->tn_quality < 2 || args->tn_quality > 31) { @@ -152,20 +140,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { args->output = expandpath(args->output); } - int ret = mkdir(args->output, S_IRUSR | S_IWUSR | S_IXUSR); - if (ret != 0) { - fprintf(stderr, "Invalid output: '%s' (%s).\n", args->output, strerror(errno)); - return 1; + char *abs_output = abspath(args->output); + if (args->incremental && abs_output == NULL) { + LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", abs_output); + args->incremental = FALSE; + } else if (!args->incremental && abs_output != NULL) { + LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output); } + free(abs_output); if (args->depth <= 0) { - args->depth = G_MAXINT32; + args->depth = 2147483647; } else { args->depth += 1; } if (args->name == OPTION_VALUE_UNSPECIFIED) { - args->name = g_path_get_basename(args->output); + args->name = malloc(strlen(DEFAULT_NAME) + 1); + strcpy(args->name, DEFAULT_NAME); } else { char *tmp = malloc(strlen(args->name) + 1); strcpy(tmp, args->name); @@ -224,7 +216,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { } if (trained_data_path != NULL && path != trained_data_path) { LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata " - "files must be in the same folder") + "files must be in the same folder"); } trained_data_path = path; @@ -232,7 +224,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { } free(lang); - ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang); + int ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang); if (ret != 0) { fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang); return 1; @@ -249,12 +241,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0); if (error != NULL) { - LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset) + LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset); } pcre_extra *re_extra = pcre_study(re, 0, &error); if (error != NULL) { - LOG_FATALF("cli.c", "pcre_study returned error: %s", error) + LOG_FATALF("cli.c", "pcre_study returned error: %s", error); } ScanCtx.exclude = re; @@ -276,7 +268,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { if (args->list_path != OPTION_VALUE_UNSPECIFIED) { if (strcmp(args->list_path, "-") == 0) { args->list_file = stdin; - LOG_DEBUG("cli.c", "Using stdin as list file") + LOG_DEBUG("cli.c", "Using stdin as list file"); } else { args->list_file = fopen(args->list_path, "r"); @@ -286,27 +278,27 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { } } - LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality) - LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size) - LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count) - LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size) - LOG_DEBUGF("cli.c", "arg threads=%d", args->threads) - LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental) - LOG_DEBUGF("cli.c", "arg output=%s", args->output) - LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url) - LOG_DEBUGF("cli.c", "arg name=%s", args->name) - LOG_DEBUGF("cli.c", "arg depth=%d", args->depth) - LOG_DEBUGF("cli.c", "arg path=%s", args->path) - LOG_DEBUGF("cli.c", "arg archive=%s", args->archive) - LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase) - LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang) - LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path) - LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex) - LOG_DEBUGF("cli.c", "arg fast=%d", args->fast) - LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub) - LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold) - LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib) - LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path) + LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality); + LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size); + LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count); + LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size); + LOG_DEBUGF("cli.c", "arg threads=%d", args->threads); + LOG_DEBUGF("cli.c", "arg incremental=%d", args->incremental); + LOG_DEBUGF("cli.c", "arg output=%s", args->output); + LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url); + LOG_DEBUGF("cli.c", "arg name=%s", args->name); + LOG_DEBUGF("cli.c", "arg depth=%d", args->depth); + LOG_DEBUGF("cli.c", "arg path=%s", args->path); + LOG_DEBUGF("cli.c", "arg archive=%s", args->archive); + LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase); + LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang); + LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path); + LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex); + LOG_DEBUGF("cli.c", "arg fast=%d", args->fast); + LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub); + LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold); + LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib); + LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path); return 0; } @@ -316,20 +308,20 @@ int load_external_file(const char *file_path, char **dst) { int res = stat(file_path, &info); if (res == -1) { - LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno)) + LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno)); return 1; } int fd = open(file_path, O_RDONLY); if (fd == -1) { - LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno)) + LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno)); return 1; } *dst = malloc(info.st_size + 1); res = read(fd, *dst, info.st_size); if (res < 0) { - LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno)) + LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno)); return 1; } @@ -357,7 +349,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) { char *index_path = abspath(argv[1]); if (index_path == NULL) { - LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]) + LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]); } else { args->index_path = index_path; } @@ -392,28 +384,28 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) { args->batch_size = DEFAULT_BATCH_SIZE; } - LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url) - LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index) - LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl) - LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path) - LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path) - LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script) + LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url); + LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index); + LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl); + LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path); + LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path); + LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script); if (args->script) { char log_buf[5000]; strncpy(log_buf, args->script, sizeof(log_buf)); *(log_buf + sizeof(log_buf) - 1) = '\0'; - LOG_DEBUGF("cli.c", "arg script=%s", log_buf) + LOG_DEBUGF("cli.c", "arg script=%s", log_buf); } - LOG_DEBUGF("cli.c", "arg print=%d", args->print) - LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path) - LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings) - LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path) - LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings) - LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size) - LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset) + LOG_DEBUGF("cli.c", "arg print=%d", args->print); + LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path); + LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings); + LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path); + LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings); + LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size); + LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset); return 0; } @@ -534,23 +526,24 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) { for (int i = 0; i < args->index_count; i++) { char *abs_path = abspath(args->indices[i]); if (abs_path == NULL) { - LOG_FATALF("cli.c", "Index not found: %s", args->indices[i]) + LOG_FATALF("cli.c", "Index not found: %s", args->indices[i]); } + free(abs_path); } - LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url) - LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index) - LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl) - LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline) - LOG_DEBUGF("cli.c", "arg dev=%d", args->dev) - LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address) - LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials) - LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials) - LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user) - LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass) - LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count) + LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url); + LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index); + LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl); + LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline); + LOG_DEBUGF("cli.c", "arg dev=%d", args->dev); + LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address); + LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials); + LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials); + LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user); + LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass); + LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count); for (int i = 0; i < args->index_count; i++) { - LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i]) + LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i]); } return 0; @@ -575,7 +568,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) { char *index_path = abspath(argv[1]); if (index_path == NULL) { - LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]) + LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]); } else { args->index_path = index_path; } @@ -596,12 +589,12 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) { return 1; } - LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path) + LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path); char log_buf[5000]; strncpy(log_buf, args->script, sizeof(log_buf)); *(log_buf + sizeof(log_buf) - 1) = '\0'; - LOG_DEBUGF("cli.c", "arg script=%s", log_buf) + LOG_DEBUGF("cli.c", "arg script=%s", log_buf); return 0; } diff --git a/src/cli.h b/src/cli.h index d953621..e1e039c 100644 --- a/src/cli.h +++ b/src/cli.h @@ -13,7 +13,7 @@ typedef struct scan_args { int tn_size; int content_size; int threads; - char *incremental; + int incremental; char *output; char *rewrite_url; char *name; diff --git a/src/ctx.c b/src/ctx.c index 3c24a9f..fe6e8a7 100644 --- a/src/ctx.c +++ b/src/ctx.c @@ -3,9 +3,10 @@ ScanCtx_t ScanCtx = { .stat_index_size = 0, .stat_tn_size = 0, - .dbg_current_files = NULL, - .pool = NULL + .pool = NULL, + .index.path = {0,}, }; WebCtx_t WebCtx; IndexCtx_t IndexCtx; LogCtx_t LogCtx; +__thread ProcData_t ProcData; diff --git a/src/ctx.h b/src/ctx.h index 49fdbb7..f56afd8 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -16,22 +16,17 @@ #include "libscan/msdoc/msdoc.h" #include "libscan/wpd/wpd.h" #include "libscan/json/json.h" -#include "src/io/store.h" +#include "src/database/database.h" #include "src/index/elastic.h" +#include "sqlite3.h" -#include #include typedef struct { struct index_t index; - GHashTable *mime_table; - GHashTable *ext_table; - tpool_t *pool; - tpool_t *writer_pool; - int threads; int depth; int calculate_checksums; @@ -39,16 +34,10 @@ typedef struct { size_t stat_tn_size; size_t stat_index_size; - GHashTable *original_table; - GHashTable *copy_table; - GHashTable *new_table; - pthread_mutex_t copy_table_mu; - pcre *exclude; pcre_extra *exclude_extra; int fast; - GHashTable *dbg_current_files; pthread_mutex_t dbg_current_files_mu; int dbg_failed_files_count; @@ -84,10 +73,6 @@ typedef struct { char *es_index; int batch_size; tpool_t *pool; - store_t *tag_store; - GHashTable *tags; - store_t *meta_store; - GHashTable *meta; /** * Set to false when using --print */ @@ -117,10 +102,18 @@ typedef struct { int dev; } WebCtx_t; + +typedef struct { + int thread_id; + database_t *ipc_db; + database_t *index_db; +} ProcData_t; + extern ScanCtx_t ScanCtx; extern WebCtx_t WebCtx; extern IndexCtx_t IndexCtx; extern LogCtx_t LogCtx; +extern __thread ProcData_t ProcData; #endif diff --git a/src/database/database.c b/src/database/database.c new file mode 100644 index 0000000..741187c --- /dev/null +++ b/src/database/database.c @@ -0,0 +1,586 @@ +#include "database.h" +#include "malloc.h" +#include "src/ctx.h" +#include +#include +#include "src/util.h" + +#include + + + +database_t *database_create(const char *filename, database_type_t type) { + database_t *db = malloc(sizeof(database_t)); + + strcpy(db->filename, filename); + db->type = type; + db->select_thumbnail_stmt = NULL; + + db->ipc_ctx = NULL; + + return db; +} + +__always_inline +static int sep_rfind(const char *str) { + for (int i = (int) strlen(str); i >= 0; i--) { + if (str[i] == '/') { + return i; + } + } + return -1; +} + +void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) { + if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) { + sqlite3_result_error(ctx, "Invalid parameters", -1); + } + + const char *value = (const char *) sqlite3_value_text(argv[0]); + + int stop = sep_rfind(value); + if (stop == -1) { + sqlite3_result_null(ctx); + return; + } + char parent[PATH_MAX * 3]; + strncpy(parent, value, stop); + + sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT); +} + + +void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) { + if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) { + sqlite3_result_error(ctx, "Invalid parameters", -1); + } + + database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx); + + const char *current_job = (const char *) sqlite3_value_text(argv[0]); + + char buf[PATH_MAX]; + strcpy(buf, current_job); + + strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job); + + sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC); +} + +void database_initialize(database_t *db) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db)); + + LOG_DEBUGF("database.c", "Initializing database %s", db->filename); + if (db->type == INDEX_DATABASE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IndexDatabaseSchema, NULL, NULL, NULL)); + } else if (db->type == IPC_CONSUMER_DATABASE || db->type == IPC_PRODUCER_DATABASE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IpcDatabaseSchema, NULL, NULL, NULL)); + } + + sqlite3_close(db->db); +} + +void database_open(database_t *db) { + LOG_DEBUGF("tpool.c", "Opening database %s (%d)", db->filename, db->type); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db)); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL)); + + if (db->type == INDEX_DATABASE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL)); + } + + if (db->type == INDEX_DATABASE) { + // Prepare statements; + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "SELECT data FROM thumbnail WHERE id=? AND num=? LIMIT 1;", -1, + &db->select_thumbnail_stmt, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id", + -1, + &db->mark_document_stmt, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1, + &db->write_document_sidecar_stmt, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "REPLACE INTO document (id, mtime, size, json_data) VALUES (?, ?, ?, ?);", -1, + &db->write_document_stmt, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;", -1, + &db->write_thumbnail_stmt, NULL)); + + // Create functions + sqlite3_create_function( + db->db, + "path_parent", + 1, + SQLITE_UTF8, + NULL, + path_parent_func, + NULL, + NULL + ); + } else if (db->type == IPC_CONSUMER_DATABASE) { + + sqlite3_create_function( + db->db, + "save_current_job_info", + 1, + SQLITE_UTF8, + db->ipc_ctx, + save_current_job_info, + NULL, + NULL + ); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "DELETE FROM parse_job WHERE id = (SELECT MIN(id) FROM parse_job)" + " RETURNING filepath,mtime,st_size,save_current_job_info(filepath);", + -1, &db->pop_parse_job_stmt, NULL + )); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, + "DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)" + " RETURNING doc_id,type,line;", + -1, &db->pop_index_job_stmt, NULL + )); + + } else if (db->type == IPC_PRODUCER_DATABASE) { + char sql[40]; + int max_size_mb = 10; // TODO: read from args. + + snprintf(sql, sizeof(sql), "PRAGMA max_page_count=%d", (max_size_mb * 1024 * 1024) / 4096); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, sql, NULL, NULL, NULL)); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1, + &db->insert_parse_job_stmt, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2( + db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1, + &db->insert_index_job_stmt, NULL)); + + sqlite3_create_function( + db->db, + "path_parent", + 1, + SQLITE_UTF8, + NULL, + path_parent_func, + NULL, + NULL + ); + } + +} + +void database_close(database_t *db, int optimize) { + LOG_DEBUGF("database.c", "Closing database %s", db->filename); + + if (optimize) { + LOG_DEBUG("database.c", "Optimizing database"); + // TODO: This should be an optional argument +// CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL)); + } + + sqlite3_close(db->db); + free(db); + db = NULL; +} + +void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) { + sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC); + sqlite3_bind_int(db->select_thumbnail_stmt, 2, num); + + int ret = sqlite3_step(db->select_thumbnail_stmt); + + // TODO: if row not found, return null + if (ret != SQLITE_ROW) { + LOG_FATALF("database.c", "FIXME: tn step returned %d", ret); + } + + const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0); + const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0); + + *return_value_len = blob_size; + void *return_data = malloc(blob_size); + memcpy(return_data, blob, blob_size); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt)); + + return return_data; +} + +void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) { + + sqlite3_exec(db->db, "DELETE FROM descriptor;", NULL, NULL, NULL); + + sqlite3_stmt *stmt; + + sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch," + " root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL); + sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC); + sqlite3_bind_int(stmt, 2, desc->version_major); + sqlite3_bind_int(stmt, 3, desc->version_minor); + sqlite3_bind_int(stmt, 4, desc->version_patch); + sqlite3_bind_text(stmt, 5, desc->root, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 6, desc->name, -1, SQLITE_STATIC); + sqlite3_bind_text(stmt, 7, desc->rewrite_url, -1, SQLITE_STATIC); + sqlite3_bind_int64(stmt, 8, desc->timestamp); + + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + + sqlite3_finalize(stmt); +} + +index_descriptor_t *database_read_index_descriptor(database_t *db) { + + sqlite3_stmt *stmt; + + sqlite3_prepare_v2(db->db, "SELECT id, version_major, version_minor, version_patch," + " root, name, rewrite_url, timestamp FROM descriptor;", -1, &stmt, NULL); + + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + + const char *id = (char *) sqlite3_column_text(stmt, 0); + int v_major = sqlite3_column_int(stmt, 1); + int v_minor = sqlite3_column_int(stmt, 2); + int v_patch = sqlite3_column_int(stmt, 3); + const char *root = (char *) sqlite3_column_text(stmt, 4); + const char *name = (char *) sqlite3_column_text(stmt, 5); + const char *rewrite_url = (char *) sqlite3_column_text(stmt, 6); + int timestamp = sqlite3_column_int(stmt, 7); + + index_descriptor_t *desc = malloc(sizeof(index_descriptor_t)); + strcpy(desc->id, id); + snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch); + desc->version_major = v_major; + desc->version_minor = v_minor; + desc->version_patch = v_patch; + strcpy(desc->root, root); + strcpy(desc->name, name); + strcpy(desc->rewrite_url, rewrite_url); + desc->timestamp = timestamp; + + CRASH_IF_NOT_SQLITE_OK(sqlite3_finalize(stmt)); + + return desc; +} + +database_iterator_t *database_create_document_iterator(database_t *db) { + + sqlite3_stmt *stmt; + + // TODO: remove mtime, size, _id from json_data + + sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE" + " WHEN sc.json_data IS NULL THEN" + " CASE" + " WHEN t.tag IS NULL THEN" + " document.json_data" + " ELSE" + " json_set(document.json_data, '$.tag', json_group_array(t.tag))" + " END" + " ELSE" + " CASE" + " WHEN t.tag IS NULL THEN" + " json_patch(document.json_data, sc.json_data)" + " ELSE" + // This will overwrite any tags specified in the sidecar file! + // TODO: concatenate the two arrays? + " json_set(json_patch(document.json_data, sc.json_data), '$.tag', json_group_array(t.tag))" + " END" + " END" + " FROM document" + " LEFT JOIN document_sidecar sc ON document.id = sc.id" + " LEFT JOIN tag t ON document.id = t.id" + " GROUP BY document.id)" + " SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc", -1, &stmt, NULL); + + database_iterator_t *iter = malloc(sizeof(database_iterator_t)); + + iter->stmt = stmt; + iter->db = db; + + return iter; +} + +cJSON *database_document_iter(database_iterator_t *iter) { + + if (iter->stmt == NULL) { + LOG_ERROR("database.c", "FIXME: database_document_iter() called after iteration stopped"); + return NULL; + } + + int ret = sqlite3_step(iter->stmt); + + if (ret == SQLITE_ROW) { + const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0); + return cJSON_Parse(json_string); + } + + if (ret != SQLITE_DONE) { + LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db)); + } + + if (sqlite3_finalize(iter->stmt) != SQLITE_OK) { + LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db)); + } + + iter->stmt = NULL; + + return NULL; +} + +cJSON *database_incremental_scan_begin(database_t *db) { + LOG_DEBUG("database.c", "Preparing database for incremental scan"); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL)); +} + +cJSON *database_incremental_scan_end(database_t *db) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( + db->db, + "DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);", + NULL, NULL, NULL + )); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( + db->db, + "DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);", + NULL, NULL, NULL + )); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( + db->db, + "INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0;", + NULL, NULL, NULL + )); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( + db->db, + "DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);", + NULL, NULL, NULL + )); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( + db->db, + "DELETE FROM document WHERE marked=0;", + NULL, NULL, NULL + )); +} + +int database_mark_document(database_t *db, const char *id, int mtime) { + sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC); + sqlite3_bind_int(db->mark_document_stmt, 2, mtime); + + pthread_mutex_lock(&db->ipc_ctx->index_db_mutex); + int ret = sqlite3_step(db->mark_document_stmt); + + if (ret == SQLITE_ROW) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); + return TRUE; + } + + if (ret == SQLITE_DONE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); + return FALSE; + } + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); + + CRASH_IF_STMT_FAIL(ret); +} + +void database_write_document(database_t *db, document_t *doc, const char *json_data) { + sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC); + sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime); + sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size); + sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC); + + pthread_mutex_lock(&db->ipc_ctx->index_db_mutex); + CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); +} + + +void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) { + sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC); + sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC); + + pthread_mutex_lock(&db->ipc_ctx->index_db_mutex); + CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); +} + +void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) { + sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC); + sqlite3_bind_int(db->write_thumbnail_stmt, 2, num); + sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC); + + pthread_mutex_lock(&db->ipc_ctx->index_db_mutex); + CRASH_IF_STMT_FAIL(sqlite3_step(db->write_thumbnail_stmt)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_thumbnail_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex); +} + + +//void database_create_fts_index(database_t *db, database_t *fts_db) { +// // In a separate file, +// +// // use database_initialize() to create FTS schema +// // if --force-reset, then truncate the tables first +// +// /* +// * create/append fts table +// * +// * create/append scalar index table with +// * id,index,size,mtime,mime +// * +// * create/append path index table with +// * index,path,depth +// * +// * content table is a view with SELECT UNION for all attached tables +// * random_seed column +// */ +// +// // INSERT INTO ft(ft) VALUES('optimize'); +//} + +job_t *database_get_work(database_t *db, job_type_t job_type) { + job_t *job; + + pthread_mutex_lock(&db->ipc_ctx->mutex); + while (db->ipc_ctx->job_count == 0 && !db->ipc_ctx->no_more_jobs) { + pthread_cond_timedwait_ms(&db->ipc_ctx->has_work_cond, &db->ipc_ctx->mutex, 10); + } + pthread_mutex_unlock(&db->ipc_ctx->mutex); + + pthread_mutex_lock(&db->ipc_ctx->db_mutex); + + if (job_type == JOB_PARSE_JOB) { + int ret = sqlite3_step(db->pop_parse_job_stmt); + if (ret == SQLITE_DONE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + return NULL; + } else { + CRASH_IF_STMT_FAIL(ret); + } + + job = malloc(sizeof(*job)); + + job->parse_job = create_parse_job( + (const char *) sqlite3_column_text(db->pop_parse_job_stmt, 0), + sqlite3_column_int(db->pop_parse_job_stmt, 1), + sqlite3_column_int64(db->pop_parse_job_stmt, 2)); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt)); + } else { + + int ret = sqlite3_step(db->pop_index_job_stmt); + + if (ret == SQLITE_DONE) { + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt)); + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + return NULL; + } else { + CRASH_IF_STMT_FAIL(ret); + } + + job = malloc(sizeof(*job)); + + const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2); + if (line != NULL) { + job->bulk_line = malloc(sizeof(es_bulk_line_t) + strlen(line) + 1); + strcpy(job->bulk_line->line, line); + } else { + job->bulk_line = malloc(sizeof(es_bulk_line_t)); + } + strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0)); + job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1); + job->bulk_line->next = NULL; + + // TODO CRASH IF NOT OK + sqlite3_step(db->pop_parse_job_stmt); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt)); + } + + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + + pthread_mutex_lock(&db->ipc_ctx->mutex); + db->ipc_ctx->job_count -= 1; + pthread_mutex_unlock(&db->ipc_ctx->mutex); + + job->type = job_type; + return job; +} + +void database_add_work(database_t *db, job_t *job) { + int ret; + + pthread_mutex_lock(&db->ipc_ctx->db_mutex); + + if (job->type == JOB_PARSE_JOB) { + do { + sqlite3_bind_text(db->insert_parse_job_stmt, 1, job->parse_job->filepath, -1, SQLITE_STATIC); + sqlite3_bind_int(db->insert_parse_job_stmt, 2, job->parse_job->vfile.mtime); + sqlite3_bind_int64(db->insert_parse_job_stmt, 3, (long) job->parse_job->vfile.st_size); + + ret = sqlite3_step(db->insert_parse_job_stmt); + + if (ret == SQLITE_FULL) { + usleep(1000000); + } else { + CRASH_IF_STMT_FAIL(ret); + } + + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->insert_parse_job_stmt)); + } while (ret != SQLITE_DONE); + } else if (job->type == JOB_BULK_LINE) { + do { + sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC); + sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type); + sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC); + + ret = sqlite3_step(db->insert_index_job_stmt); + + if (ret == SQLITE_FULL) { + sqlite3_reset(db->insert_index_job_stmt); + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + usleep(100000); + pthread_mutex_lock(&db->ipc_ctx->db_mutex); + continue; + } else { + CRASH_IF_STMT_FAIL(ret); + } + + ret = sqlite3_reset(db->insert_index_job_stmt); + if (ret == SQLITE_FULL) { + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + usleep(100000); + pthread_mutex_lock(&db->ipc_ctx->db_mutex); + } + + } while (ret != SQLITE_DONE && ret != SQLITE_OK); + } else { + LOG_FATAL("database.c", "FIXME: invalid job type"); + } + pthread_mutex_unlock(&db->ipc_ctx->db_mutex); + + pthread_mutex_lock(&db->ipc_ctx->mutex); + db->ipc_ctx->job_count += 1; + pthread_cond_signal(&db->ipc_ctx->has_work_cond); + pthread_mutex_unlock(&db->ipc_ctx->mutex); +} diff --git a/src/database/database.h b/src/database/database.h new file mode 100644 index 0000000..d36f802 --- /dev/null +++ b/src/database/database.h @@ -0,0 +1,147 @@ +#ifndef SIST2_DATABASE_H +#define SIST2_DATABASE_H + +#include +#include +#include "src/sist.h" +#include "src/index/elastic.h" + +typedef struct index_descriptor index_descriptor_t; + +extern const char *IpcDatabaseSchema; +extern const char *IndexDatabaseSchema; + +typedef enum { + INDEX_DATABASE, + IPC_CONSUMER_DATABASE, + IPC_PRODUCER_DATABASE, + FTS_DATABASE +} database_type_t; + +typedef enum { + JOB_UNDEFINED, + JOB_BULK_LINE, + JOB_PARSE_JOB +} job_type_t; + +typedef struct { + job_type_t type; + union { + parse_job_t *parse_job; + es_bulk_line_t *bulk_line; + }; +} job_t; + +typedef struct { + int job_count; + int no_more_jobs; + int completed_job_count; + + pthread_mutex_t mutex; + pthread_mutex_t db_mutex; + pthread_mutex_t index_db_mutex; + pthread_cond_t has_work_cond; + char current_job[256][PATH_MAX * 2]; +} database_ipc_ctx_t; + +typedef struct database { + char filename[PATH_MAX]; + database_type_t type; + sqlite3 *db; + + // Prepared statements + sqlite3_stmt *select_thumbnail_stmt; + sqlite3_stmt *treemap_merge_up_update_stmt; + sqlite3_stmt *treemap_merge_up_delete_stmt; + + sqlite3_stmt *mark_document_stmt; + sqlite3_stmt *write_document_stmt; + sqlite3_stmt *write_document_sidecar_stmt; + sqlite3_stmt *write_thumbnail_stmt; + + sqlite3_stmt *insert_parse_job_stmt; + sqlite3_stmt *insert_index_job_stmt; + sqlite3_stmt *pop_parse_job_stmt; + sqlite3_stmt *pop_index_job_stmt; + + database_ipc_ctx_t *ipc_ctx; +} database_t; + +typedef struct { + database_t *db; + sqlite3_stmt *stmt; +} database_iterator_t; + +typedef struct { + const char *path; + const char *parent; + long size; +} treemap_row_t; + +static treemap_row_t null_treemap_row = {0, 0, 0}; + + +database_t *database_create(const char *filename, database_type_t type); + +void database_initialize(database_t *db); + +void database_open(database_t *db); + +void database_close(database_t *, int optimize); + +void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size); + +void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len); + +void database_write_index_descriptor(database_t *db, index_descriptor_t *desc); + +index_descriptor_t *database_read_index_descriptor(database_t *db); + +void database_write_document(database_t *db, document_t *doc, const char *json_data); + +database_iterator_t *database_create_document_iterator(database_t *db); + +cJSON *database_document_iter(database_iterator_t *); + +#define database_document_iter_foreach(element, iter) \ + for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter)) + +cJSON *database_incremental_scan_begin(database_t *db); + +cJSON *database_incremental_scan_end(database_t *db); + +int database_mark_document(database_t *db, const char *id, int mtime); + +void database_write_document_sidecar(database_t *db, const char *id, const char *json_data); + +database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold); + +treemap_row_t database_treemap_iter(database_iterator_t *iter); + +#define database_treemap_iter_foreach(element, iter) \ + for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter)) + + +void database_generate_stats(database_t *db, double treemap_threshold); + +job_t *database_get_work(database_t *db, job_type_t job_type); + +void database_add_work(database_t *db, job_t *job); + +//void database_index(database_t *db); + +#define CRASH_IF_STMT_FAIL(x) do { \ + int return_value = x; \ + if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) { \ + LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \ + } \ + } while (0) + +#define CRASH_IF_NOT_SQLITE_OK(x) do { \ + int return_value = x; \ + if (return_value != SQLITE_OK) { \ + LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \ + } \ + } while (0) + +#endif //SIST2_DATABASE_H \ No newline at end of file diff --git a/src/database/database_schema.c b/src/database/database_schema.c new file mode 100644 index 0000000..23cb05f --- /dev/null +++ b/src/database/database_schema.c @@ -0,0 +1,78 @@ + +const char *IpcDatabaseSchema = + "CREATE TABLE parse_job (" + " id INTEGER PRIMARY KEY," + " filepath TEXT NOT NULL," + " mtime INTEGER NOT NULL," + " st_size INTEGER NOT NULL" + ");" + "" + "CREATE TABLE index_job (" + " id INTEGER PRIMARY KEY," + " doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 )," + " type INTEGER NOT NULL," + " line TEXT" + ");"; + +const char *IndexDatabaseSchema = + "CREATE TABLE thumbnail (" + " id TEXT NOT NULL CHECK ( length(id) = 32 )," + " num INTEGER NOT NULL," + " data BLOB NOT NULL," + " PRIMARY KEY(id, num)" + ") WITHOUT ROWID;" + "" + "CREATE TABLE document (" + " id TEXT PRIMARY KEY CHECK ( length(id) = 32 )," + " marked INTEGER NOT NULL DEFAULT (1)," + " mtime INTEGER NOT NULL," + " size INTEGER NOT NULL," + " json_data TEXT NOT NULL CHECK ( json_valid(json_data) )" + ") WITHOUT ROWID;" + "" + "CREATE TABLE delete_list (" + " id TEXT PRIMARY KEY CHECK ( length(id) = 32 )" + ") WITHOUT ROWID;" + "" + "CREATE TABLE tag (" + " id TEXT NOT NULL," + " tag TEXT NOT NULL" + ");" + "" + "CREATE TABLE document_sidecar (" + " id TEXT PRIMARY KEY NOT NULL," + " json_data TEXT NOT NULL" + ") WITHOUT ROWID;" + "" + "CREATE TABLE descriptor (" + " id TEXT NOT NULL," + " version_major INTEGER NOT NULL," + " version_minor INTEGER NOT NULL," + " version_patch INTEGER NOT NULL," + " root TEXT NOT NULL," + " name TEXT NOT NULL," + " rewrite_url TEXT," + " timestamp INTEGER NOT NULL" + ");" + "" + "CREATE TABLE stats_treemap (" + " path TEXT NOT NULL," + " size INTEGER NOT NULL" + ");" + "" + "CREATE TABLE stats_size_agg (" + " bucket INTEGER NOT NULL," + " count INTEGER NOT NULL" + ");" + "" + "CREATE TABLE stats_date_agg (" + " bucket INTEGER NOT NULL," + " count INTEGER NOT NULL" + ");" + "" + "CREATE TABLE stats_mime_agg (" + " mime TEXT NOT NULL," + " size INTEGER NOT NULL," + " count INTEGER NOT NULL" + ");"; + diff --git a/src/database/database_stats.c b/src/database/database_stats.c new file mode 100644 index 0000000..2f2c73f --- /dev/null +++ b/src/database/database_stats.c @@ -0,0 +1,159 @@ +#include "database.h" +#include "src/sist.h" +#include "src/ctx.h" + +#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100) +#define SIZE_BUCKET (long)(5 * 1000 * 1000) +#define DATE_BUCKET (long)(2629800) // ~30 days + +database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) { + + sqlite3_stmt *stmt; + + sqlite3_prepare_v2(db->db, + "SELECT path, path_parent(path), size FROM tm" + " WHERE path_parent(path) IN (SELECT path FROM tm)" + " AND sizestmt = stmt; + iter->db = db; + + return iter; +} + +treemap_row_t database_treemap_iter(database_iterator_t *iter) { + + if (iter->stmt == NULL) { + LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped"); + } + + int ret = sqlite3_step(iter->stmt); + + if (ret == SQLITE_ROW) { + treemap_row_t row = { + .path = (const char *) sqlite3_column_text(iter->stmt, 0), + .parent = (const char *) sqlite3_column_text(iter->stmt, 1), + .size = sqlite3_column_int64(iter->stmt, 2) + }; + + return row; + } + + if (ret != SQLITE_DONE) { + LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db)); + } + + sqlite3_finalize(iter->stmt); + iter->stmt = NULL; + + return (treemap_row_t) {NULL, NULL, 0}; +} + +void database_generate_stats(database_t *db, double treemap_threshold) { + + LOG_INFO("database.c", "Generating stats"); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL)); + + CRASH_IF_NOT_SQLITE_OK( + sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL)); + + sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL); + sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL); + + // size aggregation + sqlite3_stmt *stmt; + sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg" + " SELECT" + " cast(size / ?1 as int) * ?1 as bucket," + " count(*) as count" + " FROM document" + " GROUP BY bucket", -1, &stmt, NULL); + sqlite3_bind_int(stmt, 1, SIZE_BUCKET); + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + + sqlite3_finalize(stmt); + + // date aggregation + sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg" + " SELECT" + " cast(mtime / ?1 as int) * ?1 as bucket," + " count(*) as count" + " FROM document" + " GROUP BY bucket", -1, &stmt, NULL); + sqlite3_bind_int(stmt, 1, DATE_BUCKET); + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + + sqlite3_finalize(stmt); + + // mime aggregation + sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg" + " SELECT" + " (json_data->>'mime') as bucket," + " sum(size)," + " count(*)" + " FROM document" + " WHERE bucket IS NOT NULL" + " GROUP BY bucket", -1, &stmt, NULL); + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + + sqlite3_finalize(stmt); + + // Treemap + sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL); + CRASH_IF_STMT_FAIL(sqlite3_step(stmt)); + long total_size = sqlite3_column_int64(stmt, 0); + long threshold = (long) ((double) total_size * treemap_threshold); + sqlite3_finalize(stmt); + + // flat map + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, + "INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)" + " FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;", + NULL, NULL, NULL)); + + // Merge up + int merged_rows = 0; + do { + if (merged_rows) { + LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows); + } + merged_rows = 0; + + sqlite3_prepare_v2(db->db, + "INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 " + " FROM tm WHERE parent not IN (SELECT path FROM tm) AND sizetreemap_merge_up_update_stmt, 1, row.size); + sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC); + CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt)); + + sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC); + CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt)); + CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt)); + + merged_rows += 1; + } + } while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE); + + CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, + "INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;", + NULL, NULL, NULL)); + + LOG_INFO("database.c", "Done!"); +} + diff --git a/src/database/database_stats.h b/src/database/database_stats.h new file mode 100644 index 0000000..fe4d507 --- /dev/null +++ b/src/database/database_stats.h @@ -0,0 +1,5 @@ +#ifndef SIST2_DATABASE_STATS_H +#define SIST2_DATABASE_STATS_H + + +#endif //SIST2_DATABASE_STATS_H diff --git a/src/index/elastic.c b/src/index/elastic.c index 9d582fc..337da0e 100644 --- a/src/index/elastic.c +++ b/src/index/elastic.c @@ -29,7 +29,7 @@ void destroy_indexer(es_indexer_t *indexer) { return; } - LOG_DEBUG("elastic.c", "Destroying indexer") + LOG_DEBUG("elastic.c", "Destroying indexer"); if (indexer->es_url != NULL) { free(indexer->es_url); @@ -64,26 +64,21 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) { cJSON_Delete(line); } -void index_json_func(tpool_work_arg_shm_t *arg) { - // Copy arg to heap because it's going to be freed immediately after this function returns - es_bulk_line_t *line = malloc(arg->arg_size); - memcpy(line, arg->arg, arg->arg_size); - - elastic_index_line(line); +void index_json_func(job_t *job) { + elastic_index_line(job->bulk_line); } -void delete_document(const char *document_id_str, void *UNUSED(_data)) { +void delete_document(const char *document_id) { es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t)); bulk_line->type = ES_BULK_LINE_DELETE; bulk_line->next = NULL; - strcpy(bulk_line->doc_id, document_id_str); + strcpy(bulk_line->doc_id, document_id); - tpool_work_arg_t arg = { - .arg_size = sizeof(es_bulk_line_t), - .arg = bulk_line - }; - tpool_add_work(IndexCtx.pool, index_json_func, &arg); + tpool_add_work(IndexCtx.pool, &(job_t) { + .type = JOB_BULK_LINE, + .bulk_line = bulk_line, + }); } @@ -100,11 +95,10 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) { bulk_line->next = NULL; cJSON_free(json); - tpool_work_arg_t arg = { - .arg_size = sizeof(es_bulk_line_t) + json_len + 2, - .arg = bulk_line - }; - tpool_add_work(IndexCtx.pool, index_json_func, &arg); + tpool_add_work(IndexCtx.pool, &(job_t) { + .type = JOB_BULK_LINE, + .bulk_line = bulk_line, + }); } void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) { @@ -278,7 +272,7 @@ void print_error(response_t *r) { void _elastic_flush(int max) { if (max == 0) { - LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue") + LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue"); return; } @@ -291,13 +285,13 @@ void _elastic_flush(int max) { response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl); if (r->status_code == 0) { - LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url) + LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url); } if (r->status_code == 413) { if (max <= 1) { - LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id) + LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id); free_response(r); free(buf); free_queue(1); @@ -318,7 +312,7 @@ void _elastic_flush(int max) { free_response(r); free(buf); - LOG_WARNING("elastic.c", "Got 429 status, will retry after delay") + LOG_WARNING("elastic.c", "Got 429 status, will retry after delay"); usleep(1000000 * 20); _elastic_flush(max); return; @@ -453,7 +447,7 @@ es_version_t *elastic_get_version(const char *es_url, int insecure) { } if (cJSON_GetObjectItem(response, "error") != NULL) { - LOG_WARNING("elastic.c", "Could not get Elasticsearch version") + LOG_WARNING("elastic.c", "Could not get Elasticsearch version"); print_error(r); free_response(r); return NULL; @@ -489,7 +483,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s IndexCtx.es_version = es_version; if (es_version == NULL) { - LOG_FATAL("elastic.c", "Could not get ES version") + LOG_FATAL("elastic.c", "Could not get ES version"); } LOG_INFOF("elastic.c", @@ -497,7 +491,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version)); if (!IS_SUPPORTED_ES_VERSION(es_version)) { - LOG_FATAL("elastic.c", "This elasticsearch version is not supported!") + LOG_FATAL("elastic.c", "This elasticsearch version is not supported!"); } char *settings = NULL; @@ -524,7 +518,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s if (r->status_code != 200) { print_error(r); - LOG_FATAL("elastic.c", "Could not create index") + LOG_FATAL("elastic.c", "Could not create index"); } LOG_INFOF("elastic.c", "Create index <%d>", r->status_code); @@ -545,7 +539,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code); if (r->status_code != 200) { print_error(r); - LOG_FATAL("elastic.c", "Could not update user settings") + LOG_FATAL("elastic.c", "Could not update user settings"); } free_response(r); @@ -560,7 +554,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code); if (r->status_code != 200) { print_error(r); - LOG_FATAL("elastic.c", "Could not update user mappings") + LOG_FATAL("elastic.c", "Could not update user mappings"); } free_response(r); diff --git a/src/index/elastic.h b/src/index/elastic.h index 319fe71..94c847d 100644 --- a/src/index/elastic.h +++ b/src/index/elastic.h @@ -46,7 +46,7 @@ void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]); void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]); -void delete_document(const char *document_id_str, void* data); +void delete_document(const char *document_id); es_indexer_t *create_indexer(const char *url, const char *index); diff --git a/src/index/web.c b/src/index/web.c index f608da1..4dcd9bc 100644 --- a/src/index/web.c +++ b/src/index/web.c @@ -65,7 +65,7 @@ void web_post_async_poll(subreq_ctx_t *req) { curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code); if (req->response->status_code == 0) { - LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer) + LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer); } curl_multi_cleanup(req->multi); @@ -104,7 +104,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) { curl_multi_add_handle(req->multi, curl); curl_multi_perform(req->multi, &req->running_handles); - LOG_DEBUGF("web.c", "async request POST %s", url) + LOG_DEBUGF("web.c", "async request POST %s", url); return req; } @@ -136,7 +136,7 @@ response_t *web_get(const char *url, int timeout, int insecure) { curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code); if (resp->status_code == 0) { - LOG_ERRORF("web.c", "CURL Error: %s", err_buffer) + LOG_ERRORF("web.c", "CURL Error: %s", err_buffer); } curl_easy_cleanup(curl); @@ -180,7 +180,7 @@ response_t *web_post(const char *url, const char *data, int insecure) { resp->size = buffer.cur; if (resp->status_code == 0) { - LOG_ERRORF("web.c", "CURL Error: %s", err_buffer) + LOG_ERRORF("web.c", "CURL Error: %s", err_buffer); } curl_easy_cleanup(curl); diff --git a/src/io/serialize.c b/src/io/serialize.c index c438025..a1432d7 100644 --- a/src/io/serialize.c +++ b/src/io/serialize.c @@ -1,9 +1,7 @@ #include "src/ctx.h" #include "serialize.h" -#include "src/parsing/parse.h" #include "src/parsing/mime.h" -#include char *get_meta_key_text(enum metakey meta_key) { @@ -79,7 +77,7 @@ char *get_meta_key_text(enum metakey meta_key) { case MetaChecksum: return "checksum"; default: - LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key) + LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key); } } @@ -175,7 +173,7 @@ char *build_json_string(document_t *doc) { break; } default: - LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key)) + LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key)); } meta_line_t *tmp = meta; @@ -189,394 +187,10 @@ char *build_json_string(document_t *doc) { return json_str; } -static struct { - FILE *out_file; - size_t buf_out_size; - - void *buf_out; - - ZSTD_CCtx *cctx; -} WriterCtx = { - .out_file = NULL -}; - -#define ZSTD_COMPRESSION_LEVEL 10 - -void initialize_writer_ctx(const char *file_path) { - WriterCtx.out_file = fopen(file_path, "wb"); - - WriterCtx.buf_out_size = ZSTD_CStreamOutSize(); - WriterCtx.buf_out = malloc(WriterCtx.buf_out_size); - - WriterCtx.cctx = ZSTD_createCCtx(); - - ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL); - ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE); - - LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path) -} - -void zstd_write_string(const char *string, const size_t len) { - ZSTD_inBuffer input = {string, len, 0}; - - do { - ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0}; - ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue); - - if (output.pos > 0) { - ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file); - } - } while (input.pos != input.size); -} - -void write_document_func(tpool_work_arg_shm_t *arg) { - - const char *json_str = arg->arg; - - if (WriterCtx.out_file == NULL) { - char dstfile[PATH_MAX]; - snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path); - initialize_writer_ctx(dstfile); - } - - zstd_write_string(json_str, arg->arg_size); -} - -void zstd_close() { - if (WriterCtx.out_file == NULL) { - LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup") - return; - } - - size_t remaining; - do { - ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0}; - remaining = ZSTD_endStream(WriterCtx.cctx, &output); - - if (output.pos > 0) { - ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file); - } - } while (remaining != 0); - - ZSTD_freeCCtx(WriterCtx.cctx); - free(WriterCtx.buf_out); - fclose(WriterCtx.out_file); - - LOG_DEBUG("serialize.c", "End zstd stream & close index file") -} - -void writer_cleanup() { - zstd_close(); - WriterCtx.out_file = NULL; -} - -void write_index_descriptor(char *path, index_descriptor_t *desc) { - cJSON *json = cJSON_CreateObject(); - cJSON_AddStringToObject(json, "id", desc->id); - cJSON_AddStringToObject(json, "version", desc->version); - cJSON_AddStringToObject(json, "root", desc->root); - cJSON_AddStringToObject(json, "name", desc->name); - cJSON_AddStringToObject(json, "type", desc->type); - cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url); - cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp); - - int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); - if (fd < 0) { - LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno)); - } - char *str = cJSON_Print(json); - size_t ret = write(fd, str, strlen(str)); - if (ret == -1) { - LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno)); - } - free(str); - close(fd); - - cJSON_Delete(json); -} - -index_descriptor_t read_index_descriptor(char *path) { - - struct stat info; - stat(path, &info); - int fd = open(path, O_RDONLY); - - if (fd == -1) { - LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno)) - } - - char *buf = malloc(info.st_size + 1); - size_t ret = read(fd, buf, info.st_size); - if (ret == -1) { - LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno)); - } - *(buf + info.st_size) = '\0'; - close(fd); - - cJSON *json = cJSON_Parse(buf); - - index_descriptor_t descriptor; - descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble; - strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring); - strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring); - strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring); - descriptor.root_len = (short) strlen(descriptor.root); - strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring); - strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring); - if (cJSON_GetObjectItem(json, "type") == NULL) { - strcpy(descriptor.type, INDEX_TYPE_NDJSON); - } else { - strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring); - } - - cJSON_Delete(json); - free(buf); - - return descriptor; -} - - void write_document(document_t *doc) { char *json_str = build_json_string(doc); + + database_write_document(ProcData.index_db, doc, json_str); free(doc); - const size_t json_str_len = strlen(json_str); - - json_str = realloc(json_str, json_str_len + 1); - *(json_str + json_str_len) = '\n'; - - tpool_work_arg_t arg = { - .arg_size = json_str_len + 1, - .arg = json_str - }; - - tpool_add_work(ScanCtx.writer_pool, write_document_func, &arg); -} - -void thread_cleanup() { - cleanup_parse(); - cleanup_font(); -} - -void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) { - - cJSON *document = cJSON_Parse(line); - const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring; - - cJSON_AddStringToObject(document, "index", index_id); - - // Load meta from sidecar files - cJSON *meta_obj = NULL; - if (IndexCtx.meta != NULL) { - const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str); - if (meta_string != NULL) { - meta_obj = cJSON_Parse(meta_string); - - cJSON *child; - for (child = meta_obj->child; child != NULL; child = child->next) { - char meta_key[4096]; - strcpy(meta_key, child->string); - cJSON_DeleteItemFromObject(document, meta_key); - cJSON_AddItemReferenceToObject(document, meta_key, child); - } - } - } - - // Load tags from tags DB - if (IndexCtx.tags != NULL) { - const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str); - if (tags_string != NULL) { - cJSON *tags_arr = cJSON_Parse(tags_string); - cJSON_DeleteItemFromObject(document, "tag"); - cJSON_AddItemToObject(document, "tag", tags_arr); - } - } - - func(document, path_md5_str); - cJSON_DeleteItemFromObject(document, "_id"); - cJSON_Delete(document); - if (meta_obj) { - cJSON_Delete(meta_obj); - } -} - -void read_lines(const char *path, const line_processor_t processor) { - dyn_buffer_t buf = dyn_buffer_create(); - - // Initialize zstd things - FILE *file = fopen(path, "rb"); - - size_t const buf_in_size = ZSTD_DStreamInSize(); - void *const buf_in = malloc(buf_in_size); - - size_t const buf_out_size = ZSTD_DStreamOutSize(); - void *const buf_out = malloc(buf_out_size); - - ZSTD_DCtx *const dctx = ZSTD_createDCtx(); - - size_t read; - size_t last_ret = 0; - while ((read = fread(buf_in, 1, buf_in_size, file))) { - ZSTD_inBuffer input = {buf_in, read, 0}; - - while (input.pos < input.size) { - ZSTD_outBuffer output = {buf_out, buf_out_size, 0}; - - size_t const ret = ZSTD_decompressStream(dctx, &output, &input); - - for (int i = 0; i < output.pos; i++) { - char c = ((char *) output.dst)[i]; - - if (c == '\n') { - dyn_buffer_write_char(&buf, '\0'); - processor.func(buf.buf, processor.data); - buf.cur = 0; - } else { - dyn_buffer_write_char(&buf, c); - } - } - - last_ret = ret; - } - } - - if (last_ret != 0) { - /* The last return value from ZSTD_decompressStream did not end on a - * frame, but we reached the end of the file! We assume this is an - * error, and the input was truncated. - */ - LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret) - } - - ZSTD_freeDCtx(dctx); - free(buf_in); - free(buf_out); - - dyn_buffer_destroy(&buf); - fclose(file); -} - -void read_index_ndjson(const char *line, void *_data) { - void **data = _data; - const char *index_id = data[0]; - index_func func = data[1]; - read_index_bin_handle_line(line, index_id, func); -} - -void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) { - if (strcmp(type, INDEX_TYPE_NDJSON) == 0) { - read_lines(path, (line_processor_t) { - .data = (void *[2]) {(void *) index_id, func}, - .func = read_index_ndjson, - }); - } -} - -static __thread GHashTable *IncrementalReadTable = NULL; - -void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) { - const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring; - const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint; - - incremental_put(IncrementalReadTable, path_md5_str, mtime); -} - -void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) { - IncrementalReadTable = table; - read_index(filepath, desc->id, desc->type, json_put_incremental); -} - -static __thread GHashTable *IncrementalCopyTable = NULL; -static __thread GHashTable *IncrementalNewTable = NULL; -static __thread store_t *IncrementalCopySourceStore = NULL; -static __thread store_t *IncrementalCopyDestinationStore = NULL; - -void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) { - - const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring; - - if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) { - // Copy index line - cJSON_DeleteItemFromObject(document, "index"); - char *json_str = cJSON_PrintUnformatted(document); - const size_t json_str_len = strlen(json_str); - - json_str = realloc(json_str, json_str_len + 1); - *(json_str + json_str_len) = '\n'; - - // Copy tn store contents - size_t buf_len; - char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len); - if (buf_len != 0) { - store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len); - free(buf); - } - - // Also copy additional thumbnails - if (cJSON_GetObjectItem(document, "thumbnail") != NULL) { - const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint; - - for (int i = 1; i < thumbnail_count; i++) { - char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4]; - - snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i); - - buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len); - if (buf_len != 0) { - store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len); - free(buf); - } - } - } - - zstd_write_string(json_str, json_str_len + 1); - free(json_str); - } -} - -/** - * Copy items from an index that are in the copy_table. Also copies from - * the store. - */ -void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, - const char *dst_filepath, GHashTable *copy_table) { - - if (WriterCtx.out_file == NULL) { - initialize_writer_ctx(dst_filepath); - } - - IncrementalCopyTable = copy_table; - IncrementalCopySourceStore = store; - IncrementalCopyDestinationStore = dst_store; - - read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc); -} - -void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) { - - char doc_id_n[SIST_DOC_ID_LEN + 1]; - doc_id_n[SIST_DOC_ID_LEN] = '\0'; - doc_id_n[SIST_DOC_ID_LEN - 1] = '\n'; - const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring; - - // do not delete archive virtual entries - if (cJSON_GetObjectItem(document, "parent") == NULL - && !incremental_get(IncrementalCopyTable, doc_id) - && !incremental_get(IncrementalNewTable, doc_id) - ) { - memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1); - zstd_write_string(doc_id, sizeof(doc_id_n)); - } -} - -void incremental_delete(const char *del_filepath, const char *index_filepath, - GHashTable *copy_table, GHashTable *new_table) { - - if (WriterCtx.out_file == NULL) { - initialize_writer_ctx(del_filepath); - } - - IncrementalCopyTable = copy_table; - IncrementalNewTable = new_table; - - read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc); -} + free(json_str); +} \ No newline at end of file diff --git a/src/io/serialize.h b/src/io/serialize.h index 2da3cb3..83614bf 100644 --- a/src/io/serialize.h +++ b/src/io/serialize.h @@ -2,55 +2,7 @@ #define SIST2_SERIALIZE_H #include "src/sist.h" -#include "store.h" - -#include -#include - -typedef struct line_processor { - void* data; - void (*func)(const char*, void*); -} line_processor_t; - -typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]); - -void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, - const char *dst_filepath, GHashTable *copy_table); - -void incremental_delete(const char *del_filepath, const char* index_filepath, - GHashTable *copy_table, GHashTable *new_table); void write_document(document_t *doc); -void read_lines(const char *path, const line_processor_t processor); - -void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func); - -void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc); - -/** - * Must be called after write_document - */ -void thread_cleanup(); - -void writer_cleanup(); - -void write_index_descriptor(char *path, index_descriptor_t *desc); - -index_descriptor_t read_index_descriptor(char *path); - -// caller ensures char file_path[PATH_MAX] -#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \ - snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \ - if (access(file_path, R_OK) == 0) { \ - action_ok; \ - } else { \ - action_main_fail; \ - } \ - snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \ - if ((cond_original) && access(file_path, R_OK) == 0) { \ - action_ok; \ - } \ - - #endif diff --git a/src/io/store.c b/src/io/store.c deleted file mode 100644 index dad686b..0000000 --- a/src/io/store.c +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include "store.h" -#include "src/ctx.h" - -//#define SIST_FAKE_STORE 1 - -void open_env(const char *path, MDB_env **env, MDB_dbi *dbi) { - mdb_env_create(env); - - int open_ret = mdb_env_open(*env, - path, - MDB_WRITEMAP | MDB_MAPASYNC, - S_IRUSR | S_IWUSR - ); - - if (open_ret != 0) { - LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path) - } - - MDB_txn *txn; - mdb_txn_begin(*env, NULL, 0, &txn); - mdb_dbi_open(txn, NULL, 0, dbi); - mdb_txn_commit(txn); -} - -store_t *store_create(const char *path, size_t chunk_size) { - store_t *store = calloc(1, sizeof(struct store_t)); - mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR); - strcpy(store->path, path); - - MDB_env *env; - MDB_dbi dbi; - -#if (SIST_FAKE_STORE != 1) - store->chunk_size = chunk_size; - - store->shm = mmap(NULL, sizeof(*store->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - - open_env(path, &env, &dbi); - - store->shm->size = (size_t) store->chunk_size; - mdb_env_set_mapsize(env, store->shm->size); - - // Close, child processes will open the environment again - mdb_env_close(env); -#endif - - return store; -} - -void store_destroy(store_t *store) { - - LOG_DEBUG("store.c", "store_destroy()") -#if (SIST_FAKE_STORE != 1) - munmap(store->shm, sizeof(*store->shm)); - - mdb_dbi_close(store->proc.env, store->proc.dbi); - mdb_env_close(store->proc.env); -#endif - free(store); -} - -void store_flush(store_t *store) { - mdb_env_sync(store->proc.env, TRUE); -} - -void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) { - - ScanCtx.stat_tn_size += buf_len; - - if (LogCtx.very_verbose) { - LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len) - } - -#if (SIST_FAKE_STORE != 1) - - if (store->proc.env == NULL) { - open_env(store->path, &store->proc.env, &store->proc.dbi); - LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path) - } - - MDB_val mdb_key; - mdb_key.mv_data = key; - mdb_key.mv_size = key_len; - - MDB_val mdb_value; - mdb_value.mv_data = buf; - mdb_value.mv_size = buf_len; - - MDB_txn *txn; - - int db_full = FALSE; - int put_ret = 0; - int should_abort_transaction = FALSE; - int should_increase_size = TRUE; - - int begin_ret = mdb_txn_begin(store->proc.env, NULL, 0, &txn); - - if (begin_ret == MDB_MAP_RESIZED) { - // mapsize was increased by another process. We don't need to increase the size again, but we need - // to update the size of the environment for the current process. - db_full = TRUE; - should_increase_size = FALSE; - } else { - put_ret = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0); - - if (put_ret == MDB_MAP_FULL) { - // Database is full, we need to increase the environment size - db_full = TRUE; - should_abort_transaction = TRUE; - } else { - int commit_ret = mdb_txn_commit(txn); - - if (commit_ret == MDB_MAP_FULL) { - db_full = TRUE; - } - } - } - - if (db_full) { - LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->shm->size) - - if (should_abort_transaction) { - mdb_txn_abort(txn); - } - - // Cannot resize when there is an opened transaction in this process. - // Resize take effect on the next commit. - if (should_increase_size) { - store->shm->size += store->chunk_size; - } - int resize_ret = mdb_env_set_mapsize(store->proc.env, store->shm->size); - if (resize_ret != 0) { - LOG_ERRORF("store.c", "mdb_env_set_mapsize() failed: %s", mdb_strerror(resize_ret)) - } - mdb_txn_begin(store->proc.env, NULL, 0, &txn); - int put_ret_retry = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0); - - if (put_ret_retry != 0) { - LOG_ERRORF("store.c", "mdb_put() (retry) failed: %s", mdb_strerror(put_ret_retry)) - } - - int ret = mdb_txn_commit(txn); - if (ret != 0) { - LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d", - store->path, mdb_strerror(ret), ret, - ret, put_ret_retry) - } - LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->shm->size) - } else if (put_ret != 0) { - LOG_ERRORF("store.c", "mdb_put() failed: %s", mdb_strerror(put_ret)) - } - -#endif -} - -char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len) { - char *buf = NULL; - -#if (SIST_FAKE_STORE != 1) - if (store->proc.env == NULL) { - open_env(store->path, &store->proc.env, &store->proc.dbi); - } - - MDB_val mdb_key; - mdb_key.mv_data = key; - mdb_key.mv_size = key_len; - - MDB_val mdb_value; - - MDB_txn *txn; - mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn); - - int get_ret = mdb_get(txn, store->proc.dbi, &mdb_key, &mdb_value); - - if (get_ret == MDB_NOTFOUND) { - *return_value_len = 0; - } else { - *return_value_len = mdb_value.mv_size; - buf = malloc(mdb_value.mv_size); - memcpy(buf, mdb_value.mv_data, mdb_value.mv_size); - } - - mdb_txn_abort(txn); -#endif - return buf; -} - -GHashTable *store_read_all(store_t *store) { - - if (store->proc.env == NULL) { - open_env(store->path, &store->proc.env, &store->proc.dbi); - LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path) - } - - int count = 0; - - GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free); - - MDB_txn *txn = NULL; - mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn); - - MDB_cursor *cur = NULL; - mdb_cursor_open(txn, store->proc.dbi, &cur); - - MDB_val key; - MDB_val value; - - while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) { - char *key_str = malloc(key.mv_size); - memcpy(key_str, key.mv_data, key.mv_size); - char *val_str = malloc(value.mv_size); - memcpy(val_str, value.mv_data, value.mv_size); - - g_hash_table_insert(table, key_str, val_str); - count += 1; - } - - const char *path; - mdb_env_get_path(store->proc.env, &path); - LOG_DEBUGF("store.c", "Read %d entries from %s", count, path) - - mdb_cursor_close(cur); - mdb_txn_abort(txn); - return table; -} - - -void store_copy(store_t *store, const char *destination) { - mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR); - mdb_env_copy(store->proc.env, destination); -} diff --git a/src/io/store.h b/src/io/store.h deleted file mode 100644 index ce27ded..0000000 --- a/src/io/store.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef SIST2_STORE_H -#define SIST2_STORE_H - -#include -#include - -#include - -#define STORE_SIZE_TN (1024 * 1024 * 5) -#define STORE_SIZE_TAG (1024 * 1024) -#define STORE_SIZE_META STORE_SIZE_TAG - - -typedef struct store_t { - char path[PATH_MAX]; - size_t chunk_size; - - struct { - MDB_dbi dbi; - MDB_env *env; - } proc; - - struct { - size_t size; - } *shm; -} store_t; - -store_t *store_create(const char *path, size_t chunk_size); - -void store_destroy(store_t *store); - -void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len); - -void store_flush(store_t *store); - -char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len); - -GHashTable *store_read_all(store_t *store); - -void store_copy(store_t *store, const char *destination); - -#endif diff --git a/src/io/walk.c b/src/io/walk.c index b6019cf..c9fa8b0 100644 --- a/src/io/walk.c +++ b/src/io/walk.c @@ -1,46 +1,12 @@ #include "walk.h" #include "src/ctx.h" -#include "src/parsing/parse.h" +#include "src/parsing/fs_util.h" #include +#include #define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0) -__always_inline -parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) { - int len = (int) strlen(filepath); - parse_job_t *job = malloc(sizeof(parse_job_t)); - - strcpy(job->filepath, filepath); - job->base = base; - char *p = strrchr(filepath + base, '.'); - if (p != NULL) { - job->ext = (int) (p - filepath + 1); - } else { - job->ext = len; - } - - job->vfile.st_size = info->st_size; - job->vfile.st_mode = info->st_mode; - job->vfile.mtime = (int) info->st_mtim.tv_sec; - - job->parent[0] = '\0'; - - memcpy(job->vfile.filepath, job->filepath, sizeof(job->vfile.filepath)); - job->vfile.read = fs_read; - // Filesystem reads are always rewindable - job->vfile.read_rewindable = fs_read; - job->vfile.reset = fs_reset; - job->vfile.close = fs_close; - job->vfile.fd = -1; - job->vfile.is_fs_file = TRUE; - job->vfile.has_checksum = FALSE; - job->vfile.rewind_buffer_size = 0; - job->vfile.rewind_buffer = NULL; - job->vfile.calculate_checksum = ScanCtx.calculate_checksums; - - return job; -} int sub_strings[30]; #define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0) @@ -55,7 +21,7 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st } if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) { - LOG_DEBUGF("walk.c", "Excluded: %s", filepath) + LOG_DEBUGF("walk.c", "Excluded: %s", filepath); if (typeflag == FTW_F && S_ISREG(info->st_mode)) { pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); @@ -69,13 +35,13 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st } if (typeflag == FTW_F && S_ISREG(info->st_mode)) { - parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base); + parse_job_t *job = create_parse_job(filepath, (int) info->st_mtim.tv_sec, info->st_size); - tpool_work_arg_t arg = { - .arg_size = sizeof(parse_job_t), - .arg = job - }; - tpool_add_work(ScanCtx.pool, parse, &arg); + tpool_add_work(ScanCtx.pool, &(job_t) { + .type = JOB_PARSE_JOB, + .parse_job = job + }); + free(job); } return FTW_CONTINUE; @@ -116,7 +82,7 @@ int iterate_file_list(void *input_file) { } if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) { - LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path) + LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path); if (S_ISREG(info.st_mode)) { pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); @@ -131,16 +97,14 @@ int iterate_file_list(void *input_file) { LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf); } - int base = (int) (strrchr(buf, '/') - buf) + 1; - - parse_job_t *job = create_fs_parse_job(absolute_path, &info, base); + parse_job_t *job = create_parse_job(absolute_path, (int) info.st_mtim.tv_sec, info.st_size); free(absolute_path); - tpool_work_arg_t arg = { - .arg = job, - .arg_size = sizeof(parse_job_t) - }; - tpool_add_work(ScanCtx.pool, parse, &arg); + tpool_add_work(ScanCtx.pool, &(job_t) { + .type = JOB_PARSE_JOB, + .parse_job = job + }); + free(job); } return 0; diff --git a/src/log.c b/src/log.c index b5e1ece..4ca6869 100644 --- a/src/log.c +++ b/src/log.c @@ -21,8 +21,6 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) { char log_str[LOG_MAX_LENGTH]; - unsigned long long pid = (unsigned long long) pthread_self(); - char datetime[32]; time_t t; struct tm result; @@ -42,8 +40,8 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) { log_len = snprintf( log_str, sizeof(log_str), - "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n", - pid, datetime, log_levels[level], filepath_json_str, log_str_json_str + "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n", + ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str ); cJSON_Delete(filepath_json); @@ -58,15 +56,15 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) { if (is_tty) { log_len = snprintf( log_str, sizeof(log_str), - "\033[%dm[%04llX]%s [%s] [%s %s] ", - 31 + ((unsigned int) (pid)) % 7, pid, log_colors[level], + "\033[%dmT%d%s [%s] [%s %s] ", + 31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level], datetime, log_levels[level], filepath ); } else { log_len = snprintf( log_str, sizeof(log_str), - "[%04llX] [%s] [%s %s] ", - pid, datetime, log_levels[level], filepath + "T%d [%s] [%s %s] ", + ProcData.thread_id, datetime, log_levels[level], filepath ); } @@ -112,8 +110,6 @@ void sist_log(const char *filepath, int level, char *str) { char log_str[LOG_MAX_LENGTH]; - unsigned long long pid = (unsigned long long) pthread_self(); - char datetime[32]; time_t t; struct tm result; @@ -132,8 +128,8 @@ void sist_log(const char *filepath, int level, char *str) { log_len = snprintf( log_str, sizeof(log_str), - "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n", - pid, datetime, log_levels[level], filepath_json_str, log_str_json_str + "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n", + ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str ); cJSON_Delete(log_str_json); @@ -147,16 +143,16 @@ void sist_log(const char *filepath, int level, char *str) { if (is_tty) { log_len = snprintf( log_str, sizeof(log_str), - "\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n", - 31 + ((unsigned int) (pid)) % 7, pid, log_colors[level], + "\033[%dmT%d%s [%s] [%s %s] %s \033[0m\n", + 31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level], datetime, log_levels[level], filepath, str ); } else { log_len = snprintf( log_str, sizeof(log_str), - "[%04llX] [%s] [%s %s] %s \n", - pid, datetime, log_levels[level], filepath, + "T%d [%s] [%s %s] %s \n", + ProcData.thread_id, datetime, log_levels[level], filepath, str ); } diff --git a/src/log.h b/src/log.h index 113a577..2426be3 100644 --- a/src/log.h +++ b/src/log.h @@ -2,6 +2,7 @@ #define SIST2_LOG_H +#include #define LOG_MAX_LENGTH 8192 #define LOG_SIST_DEBUG 0 @@ -10,37 +11,37 @@ #define LOG_SIST_ERROR 3 #define LOG_SIST_FATAL 4 -#define LOG_DEBUGF(filepath, fmt, ...) \ - if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);} -#define LOG_DEBUG(filepath, str) \ - if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);} +#define LOG_DEBUGF(filepath, fmt, ...) do{\ + if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}}while(0) +#define LOG_DEBUG(filepath, str) do{\ + if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}}while(0) -#define LOG_INFOF(filepath, fmt, ...) \ - if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);} -#define LOG_INFO(filepath, str) \ - if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);} +#define LOG_INFOF(filepath, fmt, ...) do {\ + if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}} while(0) +#define LOG_INFO(filepath, str) do {\ + if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}} while(0) -#define LOG_WARNINGF(filepath, fmt, ...) \ - if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);} -#define LOG_WARNING(filepath, str) \ - if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);} +#define LOG_WARNINGF(filepath, fmt, ...) do {\ + if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}}while(0) +#define LOG_WARNING(filepath, str) do{\ + if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}}while(0) -#define LOG_ERRORF(filepath, fmt, ...) \ - if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);} -#define LOG_ERROR(filepath, str) \ - if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);} +#define LOG_ERRORF(filepath, fmt, ...) do {\ + if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}}while(0) +#define LOG_ERROR(filepath, str) do{\ + if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}}while(0) -#define LOG_FATALF(filepath, fmt, ...) \ +#define LOG_FATALF(filepath, fmt, ...)\ sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__);\ - exit(-1); + raise(SIGUSR1) #define LOG_FATAL(filepath, str) \ sist_log(filepath, LOG_SIST_FATAL, str);\ - exit(-1); + exit(SIGUSR1) #define LOG_FATALF_NO_EXIT(filepath, fmt, ...) \ - sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__); + sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__) #define LOG_FATAL_NO_EXIT(filepath, str) \ - sist_log(filepath, LOG_SIST_FATAL, str); + sist_log(filepath, LOG_SIST_FATAL, str) #include "sist.h" diff --git a/src/main.c b/src/main.c index 38a5dd5..72bdc89 100644 --- a/src/main.c +++ b/src/main.c @@ -5,8 +5,6 @@ #include #include "cli.h" -#include "io/serialize.h" -#include "io/store.h" #include "tpool.h" #include "io/walk.h" #include "index/elastic.h" @@ -16,10 +14,9 @@ #include "auth0/auth0_c_api.h" #include -#include -#include +#include -#include "stats.h" +#include "src/database/database.h" #define DESCRIPTION "Lightning-fast file system indexer and search tool." @@ -46,30 +43,31 @@ void sig_handler(int signum) { LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n"); LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum)); - if (ScanCtx.dbg_current_files != NULL) { - GHashTableIter iter; - g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files); - - void *key; - void *value; - while (g_hash_table_iter_next(&iter, &key, &value)) { - parse_job_t *job = value; - - if (isatty(STDERR_FILENO)) { - LOG_DEBUGF( - "*SIGNAL HANDLER*", - "Thread \033[%dm[%04llX]\033[0m was working on job '%s'", - 31 + ((unsigned int) key) % 7, key, job->filepath - ); - } else { - LOG_DEBUGF( - "*SIGNAL HANDLER*", - "THREAD [%04llX] was working on job %s", - key, job->filepath - ); - } - } - } + // TODO: Print debug info +// if (ScanCtx.dbg_current_files != NULL) { +// GHashTableIter iter; +// g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files); +// +// void *key; +// void *value; +// while (g_hash_table_iter_next(&iter, &key, &value)) { +// parse_job_t *job = value; +// +// if (isatty(STDERR_FILENO)) { +// LOG_DEBUGF( +// "*SIGNAL HANDLER*", +// "Thread \033[%dm[%04llX]\033[0m was working on job '%s'", +// 31 + ((unsigned int) key) % 7, key, job->filepath +// ); +// } else { +// LOG_DEBUGF( +// "*SIGNAL HANDLER*", +// "THREAD [%04llX] was working on job %s", +// key, job->filepath +// ); +// } +// } +// } if (ScanCtx.pool != NULL) { tpool_dump_debug_info(ScanCtx.pool); @@ -82,18 +80,18 @@ void sig_handler(int signum) { LOG_INFO( "*SIGNAL HANDLER*", "Please consider creating a bug report at https://github.com/simon987/sist2/issues !" - ) + ); LOG_INFO( "*SIGNAL HANDLER*", "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs" - ) + ); #ifndef SIST_DEBUG LOG_WARNING( "*SIGNAL HANDLER*", "You are running sist2 in release mode! Please consider downloading the debug binary from the Github " "releases page to provide additionnal information when submitting a bug report." - ) + ); #endif if (signum == SIGSEGV && sigsegv_handler != NULL) { @@ -105,36 +103,59 @@ void sig_handler(int signum) { exit(-1); } -void init_dir(const char *dirpath, scan_args_t *args) { - char path[PATH_MAX]; - snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); +void database_scan_begin(scan_args_t *args) { + index_descriptor_t *desc = &ScanCtx.index.desc; - time(&ScanCtx.index.desc.timestamp); - strcpy(ScanCtx.index.desc.version, Version); - strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON); + database_t *db = database_create(args->output, INDEX_DATABASE); + + if (args->incremental) { + // Update existing descriptor + database_open(db); + index_descriptor_t *original_desc = database_read_index_descriptor(db); + + // copy original index id + strcpy(desc->id, original_desc->id); + + if (original_desc->version_major != VersionMajor) { + LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version); + } + + strcpy(original_desc->root, desc->root); + original_desc->root_len = desc->root_len; + strcpy(original_desc->rewrite_url, desc->rewrite_url); + strcpy(original_desc->name, desc->name); + + time(&original_desc->timestamp); + + database_write_index_descriptor(db, original_desc); + free(original_desc); + + database_incremental_scan_begin(db); - if (args->incremental != NULL) { - // copy old index id - char descriptor_path[PATH_MAX]; - snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); - index_descriptor_t original_desc = read_index_descriptor(descriptor_path); - memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id)); } else { + // Create new descriptor + + time(&desc->timestamp); + strcpy(desc->version, Version); + desc->version_major = VersionMajor; + desc->version_minor = VersionMinor; + desc->version_patch = VersionPatch; + // generate new index id based on timestamp unsigned char index_md5[MD5_DIGEST_LENGTH]; MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5); buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id); + + database_initialize(db); + database_open(db); + database_write_index_descriptor(db, desc); } - write_index_descriptor(path, &ScanCtx.index.desc); + database_close(db, FALSE); } -void scan_print_header() { - LOG_INFOF("main.c", "sist2 v%s", Version) -} - -void _store(char *key, size_t key_len, char *buf, size_t buf_len) { - store_write(ScanCtx.index.store, key, key_len, buf, buf_len); +void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) { + database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len); } void _log(const char *filepath, int level, char *str) { @@ -177,11 +198,8 @@ void _logf(const char *filepath, int level, char *format, ...) { } void initialize_scan_context(scan_args_t *args) { - - ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL); - pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); + // TODO: shared pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); - pthread_mutex_init(&ScanCtx.copy_table_mu, NULL); ScanCtx.calculate_checksums = args->calculate_checksums; @@ -189,7 +207,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.arc_ctx.mode = args->archive_mode; ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.logf = _logf; - ScanCtx.arc_ctx.parse = (parse_callback_t) parse_job; + ScanCtx.arc_ctx.parse = (parse_callback_t) parse; if (args->archive_passphrase != NULL) { strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase); } else { @@ -199,12 +217,12 @@ void initialize_scan_context(scan_args_t *args) { // Comic ScanCtx.comic_ctx.log = _log; ScanCtx.comic_ctx.logf = _logf; - ScanCtx.comic_ctx.store = _store; + ScanCtx.comic_ctx.store = write_thumbnail_callback; ScanCtx.comic_ctx.enable_tn = args->tn_count > 0; ScanCtx.comic_ctx.tn_size = args->tn_size; ScanCtx.comic_ctx.tn_qscale = args->tn_quality; - ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr"); - ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz"); + ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr"); + ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz"); // Ebook ScanCtx.ebook_ctx.content_size = args->content_size; @@ -216,7 +234,7 @@ void initialize_scan_context(scan_args_t *args) { } ScanCtx.ebook_ctx.log = _log; ScanCtx.ebook_ctx.logf = _logf; - ScanCtx.ebook_ctx.store = _store; + ScanCtx.ebook_ctx.store = write_thumbnail_callback; ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub; ScanCtx.ebook_ctx.tn_qscale = args->tn_quality; @@ -224,7 +242,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.font_ctx.enable_tn = args->tn_count > 0; ScanCtx.font_ctx.log = _log; ScanCtx.font_ctx.logf = _logf; - ScanCtx.font_ctx.store = _store; + ScanCtx.font_ctx.store = write_thumbnail_callback; // Media ScanCtx.media_ctx.tn_qscale = args->tn_quality; @@ -232,7 +250,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.media_ctx.tn_count = args->tn_count; ScanCtx.media_ctx.log = _log; ScanCtx.media_ctx.logf = _logf; - ScanCtx.media_ctx.store = _store; + ScanCtx.media_ctx.store = write_thumbnail_callback; ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024; ScanCtx.media_ctx.read_subtitles = args->read_subtitles; ScanCtx.media_ctx.read_subtitles = args->tn_count; @@ -248,7 +266,7 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.ooxml_ctx.content_size = args->content_size; ScanCtx.ooxml_ctx.log = _log; ScanCtx.ooxml_ctx.logf = _logf; - ScanCtx.ooxml_ctx.store = _store; + ScanCtx.ooxml_ctx.store = write_thumbnail_callback; // MOBI ScanCtx.mobi_ctx.content_size = args->content_size; @@ -264,8 +282,8 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.msdoc_ctx.content_size = args->content_size; ScanCtx.msdoc_ctx.log = _log; ScanCtx.msdoc_ctx.logf = _logf; - ScanCtx.msdoc_ctx.store = _store; - ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword"); + ScanCtx.msdoc_ctx.store = write_thumbnail_callback; + ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword"); ScanCtx.threads = args->threads; ScanCtx.depth = args->depth; @@ -283,174 +301,67 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.raw_ctx.tn_size = args->tn_size; ScanCtx.raw_ctx.log = _log; ScanCtx.raw_ctx.logf = _logf; - ScanCtx.raw_ctx.store = _store; + ScanCtx.raw_ctx.store = write_thumbnail_callback; // Wpd ScanCtx.wpd_ctx.content_size = args->content_size; ScanCtx.wpd_ctx.log = _log; ScanCtx.wpd_ctx.logf = _logf; - ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect"); + ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect"); // Json ScanCtx.json_ctx.content_size = args->content_size; ScanCtx.json_ctx.log = _log; ScanCtx.json_ctx.logf = _logf; - ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json"); - ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson"); + ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json"); + ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson"); } -/** - * Loads an existing index as the baseline for incremental scanning. - * 1. load old index files (original+main) => original_table - * 2. allocate empty table => copy_table - * 3. allocate empty table => new_table - * the original_table/copy_table/new_table will be populated in parsing/parse.c:parse - * and consumed in main.c:save_incremental_index - * - * Note: the existing index may or may not be of incremental index form. - */ -void load_incremental_index(const scan_args_t *args) { - char file_path[PATH_MAX]; - - ScanCtx.original_table = incremental_get_table(); - ScanCtx.copy_table = incremental_get_table(); - ScanCtx.new_table = incremental_get_table(); - - char descriptor_path[PATH_MAX]; - snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); - index_descriptor_t original_desc = read_index_descriptor(descriptor_path); - - if (strcmp(original_desc.version, Version) != 0) { - LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version) - } - - READ_INDICES( - file_path, - args->incremental, - incremental_read(ScanCtx.original_table, file_path, &original_desc), - LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"), - TRUE - ); - - LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) -} - -/** - * Saves an incremental index. - * Before calling this function, the scanner should have finished writing the main index. - * 1. Build original_table - new_table => delete_table - * 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store - */ -void save_incremental_index(scan_args_t *args) { - char dst_path[PATH_MAX]; - char store_path[PATH_MAX]; - char file_path[PATH_MAX]; - char del_path[PATH_MAX]; - snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); - snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path); - store_t *source = store_create(store_path, STORE_SIZE_TN); - - LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u", - g_hash_table_size(ScanCtx.original_table), - g_hash_table_size(ScanCtx.copy_table), - g_hash_table_size(ScanCtx.new_table)); - snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path); - READ_INDICES(file_path, args->incremental, - incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table), - perror("incremental_delete"), 1); - writer_cleanup(); - - READ_INDICES(file_path, args->incremental, - incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table), - perror("incremental_copy"), 1); - writer_cleanup(); - - store_destroy(source); - - snprintf(store_path, PATH_MAX, "%stags", args->incremental); - snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path); - store_t *source_tags = store_create(store_path, STORE_SIZE_TAG); - store_copy(source_tags, dst_path); - store_destroy(source_tags); -} - -/** - * An index can be either incremental or non-incremental (initial index). - * For an initial index, there is only the "main" index. - * For an incremental index, there are, additionally: - * - An "original" index, referencing all files unchanged since the previous index. - * - A "delete" index, referencing all files that exist in the previous index, but deleted since then. - * Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem, - * and is orthognal with the "delete" index. When building an incremental index upon an old incremental index, - * the old "delete" index can be safely ignored. - */ void sist2_scan(scan_args_t *args) { - - ScanCtx.mime_table = mime_get_mime_table(); - ScanCtx.ext_table = mime_get_ext_table(); - initialize_scan_context(args); - init_dir(ScanCtx.index.path, args); + database_scan_begin(args); - char store_path[PATH_MAX]; - snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path); - ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN); + LOG_INFOF("main.c", "sist2 v%s", Version); - snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path); - ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META); - - scan_print_header(); - - if (args->incremental != NULL) { - load_incremental_index(args); - } - - ScanCtx.writer_pool = tpool_create(1, writer_cleanup, FALSE); - tpool_start(ScanCtx.writer_pool); - - ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE); + ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE); tpool_start(ScanCtx.pool); if (args->list_path) { // Scan using file list int list_ret = iterate_file_list(args->list_file); if (list_ret != 0) { - LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret) + LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret); } } else { // Scan directory recursively int walk_ret = walk_directory_tree(ScanCtx.index.desc.root); if (walk_ret == -1) { - LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno) + LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno); } } tpool_wait(ScanCtx.pool); tpool_destroy(ScanCtx.pool); - tpool_wait(ScanCtx.writer_pool); - tpool_destroy(ScanCtx.writer_pool); + LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count); + LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count); + LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count); + LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size); + LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size); - LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count) - LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count) - LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count) - LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size) - LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size) + database_t *db = database_create(args->output, INDEX_DATABASE); + database_open(db); - if (args->incremental != NULL) { - save_incremental_index(args); + if (args->incremental != FALSE) { + database_incremental_scan_end(db); } - generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path); - - store_destroy(ScanCtx.index.store); - store_destroy(ScanCtx.index.meta_store); + database_generate_stats(db, args->treemap_threshold); + database_close(db, TRUE); } void sist2_index(index_args_t *args) { - char file_path[PATH_MAX]; - IndexCtx.es_url = args->es_url; IndexCtx.es_index = args->es_index; IndexCtx.es_insecure_ssl = args->es_insecure_ssl; @@ -461,91 +372,69 @@ void sist2_index(index_args_t *args) { elastic_init(args->force_reset, args->es_mappings, args->es_settings); } - char descriptor_path[PATH_MAX]; - snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path); + database_t *db = database_create(args->index_path, INDEX_DATABASE); + database_open(db); + index_descriptor_t *desc = database_read_index_descriptor(db); + database_close(db, FALSE); - index_descriptor_t desc = read_index_descriptor(descriptor_path); + LOG_DEBUGF("main.c", "Index version %s", desc->version); - LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) - - if (strcmp(desc.version, Version) != 0) { - LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version) + if (desc->version_major != VersionMajor) { + LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version); } - DIR *dir = opendir(args->index_path); - if (dir == NULL) { - LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno)) - } - - char path_tmp[PATH_MAX]; - snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path); - IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG); - IndexCtx.tags = store_read_all(IndexCtx.tag_store); - - snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path); - IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META); - IndexCtx.meta = store_read_all(IndexCtx.meta_store); - - index_func f; - if (args->print) { - f = print_json; - } else { - f = index_json; - } - - IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, args->print == 0); + IndexCtx.pool = tpool_create(args->threads, args->print == FALSE); tpool_start(IndexCtx.pool); - READ_INDICES(file_path, args->index_path, { - read_index(file_path, desc.id, desc.type, f); - LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type); - }, {}, !args->incremental); + int cnt = 0; - // Only read the _delete index if we're sending data to ES - if (!args->print) { - snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path); - if (0 == access(file_path, R_OK)) { - read_lines(file_path, (line_processor_t) { - .data = NULL, - .func = delete_document - }); - LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type) + db = database_create(args->index_path, INDEX_DATABASE); + database_open(db); + database_iterator_t *iterator = database_create_document_iterator(db); + database_document_iter_foreach(json, iterator) { + const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring; + if (args->print) { + print_json(json, doc_id); + } else { + index_json(json, doc_id); + cnt +=1; } } - closedir(dir); + free(iterator); + database_close(db, FALSE); + + // Only read the _delete index if we're sending data to ES + if (!args->print) { + // TODO: (delete_list iterator) + } tpool_wait(IndexCtx.pool); - tpool_destroy(IndexCtx.pool); if (IndexCtx.needs_es_connection) { - finish_indexer(args->script, args->async_script, desc.id); + finish_indexer(args->script, args->async_script, desc->id); } - - store_destroy(IndexCtx.tag_store); - store_destroy(IndexCtx.meta_store); - g_hash_table_remove_all(IndexCtx.tags); - g_hash_table_destroy(IndexCtx.tags); + free(desc); } void sist2_exec_script(exec_args_t *args) { - LogCtx.verbose = TRUE; - char descriptor_path[PATH_MAX]; - snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path); - index_descriptor_t desc = read_index_descriptor(descriptor_path); - IndexCtx.es_url = args->es_url; IndexCtx.es_index = args->es_index; IndexCtx.es_insecure_ssl = args->es_insecure_ssl; IndexCtx.needs_es_connection = TRUE; - LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) + database_t *db = database_create(args->index_path, INDEX_DATABASE); + database_open(db); - execute_update_script(args->script, args->async_script, desc.id); + index_descriptor_t *desc = database_read_index_descriptor(db); + LOG_DEBUGF("main.c", "Index version %s", desc->version); + + execute_update_script(args->script, args->async_script, desc->id); free(args->script); + database_close(db, FALSE); } void sist2_web(web_args_t *args) { @@ -569,23 +458,17 @@ void sist2_web(web_args_t *args) { for (int i = 0; i < args->index_count; i++) { char *abs_path = abspath(args->indices[i]); - if (abs_path == NULL) { - return; - } - char path_tmp[PATH_MAX]; - - snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path); - WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN); - - snprintf(path_tmp, PATH_MAX, "%stags", abs_path); - mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR); - WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG); - - snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path); - WebCtx.indices[i].desc = read_index_descriptor(path_tmp); strcpy(WebCtx.indices[i].path, abs_path); - LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name) + + WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE); + database_open(WebCtx.indices[i].db); + + index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db); + WebCtx.indices[i].desc = *desc; + free(desc); + + LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name); free(abs_path); } @@ -600,7 +483,7 @@ void sist2_web(web_args_t *args) { * Negative number -> Raise error * Specified a valid number -> Continue as normal */ -int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) { +int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) { int specified_value = *(int *) option->value; if (specified_value == 0) { @@ -613,6 +496,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars } } +#include int main(int argc, const char *argv[]) { // sigsegv_handler = signal(SIGSEGV, sig_handler); @@ -645,8 +529,8 @@ int main(int argc, const char *argv[]) { OPT_GROUP("Scan options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality, - "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2", - set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality), + "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2", + set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality), OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size, "Thumbnail size, in pixels. DEFAULT=500", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size), @@ -656,7 +540,8 @@ int main(int argc, const char *argv[]) { OPT_INTEGER(0, "content-size", &scan_args->content_size, "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768", set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size), - OPT_STRING(0, "incremental", &scan_args->incremental, + OPT_BOOLEAN(0, "incremental", &scan_args->incremental, + // TODO: Update help string "Reuse an existing index and only scan modified files."), OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"), OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."), @@ -692,7 +577,8 @@ int main(int argc, const char *argv[]) { OPT_GROUP("Index options"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), - OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), + OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, + "Do not verify SSL connections to Elasticsearch."), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN(0, "incremental-index", &index_args->incremental, @@ -701,20 +587,22 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."), OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."), OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."), - OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"), + OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"), OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " "(You must use this option the first time you use the index command)"), OPT_GROUP("Web options"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), - OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), + OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, + "Do not verify SSL connections to Elasticsearch."), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"), OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"), OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"), - OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from /pem"), + OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, + "Path to Auth0 public key file extracted from /pem"), OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"), OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"), OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"), @@ -722,7 +610,8 @@ int main(int argc, const char *argv[]) { OPT_GROUP("Exec-script options"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), - OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."), + OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, + "Do not verify SSL connections to Elasticsearch."), OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"), OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."), @@ -800,7 +689,7 @@ int main(int argc, const char *argv[]) { } else { argparse_usage(&argparse); - LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]) + LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]); } printf("\n"); diff --git a/src/mempool/mempool.c b/src/mempool/mempool.c deleted file mode 100644 index d8dd0a8..0000000 --- a/src/mempool/mempool.c +++ /dev/null @@ -1,757 +0,0 @@ -#include "mempool.h" -#include - -#define NCX_SLAB_PAGE_MASK 3 -#define NCX_SLAB_PAGE 0 -#define NCX_SLAB_BIG 1 -#define NCX_SLAB_EXACT 2 -#define NCX_SLAB_SMALL 3 - -#define NCX_SLAB_PAGE_FREE 0 -#define NCX_SLAB_PAGE_BUSY 0xffffffffffffffff -#define NCX_SLAB_PAGE_START 0x8000000000000000 - -#define NCX_SLAB_SHIFT_MASK 0x000000000000000f -#define NCX_SLAB_MAP_MASK 0xffffffff00000000 -#define NCX_SLAB_MAP_SHIFT 32 - -#define NCX_SLAB_BUSY 0xffffffffffffffff - - -static ncx_slab_page_t *ncx_slab_alloc_pages(ncx_slab_pool_t *pool, ncx_uint_t pages); - -static void ncx_slab_free_pages(ncx_slab_pool_t *pool, ncx_slab_page_t *page, ncx_uint_t pages); - -static bool ncx_slab_empty(ncx_slab_pool_t *pool, ncx_slab_page_t *page); - -static ncx_uint_t ncx_slab_max_size; -static ncx_uint_t ncx_slab_exact_size; -static ncx_uint_t ncx_slab_exact_shift; -static ncx_uint_t ncx_pagesize; -static ncx_uint_t ncx_pagesize_shift; -static ncx_uint_t ncx_real_pages; - -void ncx_slab_init(ncx_slab_pool_t *pool) { - u_char *p; - size_t size; - ncx_uint_t i, n, pages; - ncx_slab_page_t *slots; - - /*pagesize*/ - ncx_pagesize = getpagesize(); - for (n = ncx_pagesize, ncx_pagesize_shift = 0; - n >>= 1; ncx_pagesize_shift++) { /* void */ } - - /* STUB */ - if (ncx_slab_max_size == 0) { - ncx_slab_max_size = ncx_pagesize / 2; - ncx_slab_exact_size = ncx_pagesize / (8 * sizeof(uintptr_t)); - for (n = ncx_slab_exact_size; n >>= 1; ncx_slab_exact_shift++) { - /* void */ - } - } - - pool->min_size = 1 << pool->min_shift; - - p = (u_char *) pool + sizeof(ncx_slab_pool_t); - slots = (ncx_slab_page_t *) p; - - n = ncx_pagesize_shift - pool->min_shift; - for (i = 0; i < n; i++) { - slots[i].slab = 0; - slots[i].next = &slots[i]; - slots[i].prev = 0; - } - - p += n * sizeof(ncx_slab_page_t); - - size = pool->end - p; - - pages = (ncx_uint_t) (size / (ncx_pagesize + sizeof(ncx_slab_page_t))); - - ncx_memzero(p, pages * sizeof(ncx_slab_page_t)); - - pool->pages = (ncx_slab_page_t *) p; - - pool->free.prev = 0; - pool->free.next = (ncx_slab_page_t *) p; - - pool->pages->slab = pages; - pool->pages->next = &pool->free; - pool->pages->prev = (uintptr_t) &pool->free; - - pool->start = (u_char *) - ncx_align_ptr((uintptr_t) p + pages * sizeof(ncx_slab_page_t), - ncx_pagesize); - - ncx_real_pages = (pool->end - pool->start) / ncx_pagesize; - pool->pages->slab = ncx_real_pages; -} - - -void *ncx_slab_alloc(ncx_slab_pool_t *pool, size_t size) { - size_t s; - uintptr_t p, n, m, mask, *bitmap; - ncx_uint_t i, slot, shift, map; - ncx_slab_page_t *page, *prev, *slots; - - if (size >= ncx_slab_max_size) { - - page = ncx_slab_alloc_pages(pool, (size >> ncx_pagesize_shift) - + ((size % ncx_pagesize) ? 1 : 0)); - if (page) { - p = (page - pool->pages) << ncx_pagesize_shift; - p += (uintptr_t) pool->start; - - } else { - p = 0; - } - - goto done; - } - - if (size > pool->min_size) { - shift = 1; - for (s = size - 1; s >>= 1; shift++) { /* void */ } - slot = shift - pool->min_shift; - - } else { - shift = pool->min_shift; - slot = 0; - } - - slots = (ncx_slab_page_t *) ((u_char *) pool + sizeof(ncx_slab_pool_t)); - page = slots[slot].next; - - if (page->next != page) { - - if (shift < ncx_slab_exact_shift) { - - do { - p = (page - pool->pages) << ncx_pagesize_shift; - bitmap = (uintptr_t *) (pool->start + p); - - map = (1 << (ncx_pagesize_shift - shift)) - / (sizeof(uintptr_t) * 8); - - for (n = 0; n < map; n++) { - - if (bitmap[n] != NCX_SLAB_BUSY) { - - for (m = 1, i = 0; m; m <<= 1, i++) { - if ((bitmap[n] & m)) { - continue; - } - - bitmap[n] |= m; - - i = ((n * sizeof(uintptr_t) * 8) << shift) - + (i << shift); - - if (bitmap[n] == NCX_SLAB_BUSY) { - for (n = n + 1; n < map; n++) { - if (bitmap[n] != NCX_SLAB_BUSY) { - p = (uintptr_t) bitmap + i; - - goto done; - } - } - - prev = (ncx_slab_page_t *) - (page->prev & ~NCX_SLAB_PAGE_MASK); - prev->next = page->next; - page->next->prev = page->prev; - - page->next = NULL; - page->prev = NCX_SLAB_SMALL; - } - - p = (uintptr_t) bitmap + i; - - goto done; - } - } - } - - page = page->next; - - } while (page); - - } else if (shift == ncx_slab_exact_shift) { - - do { - if (page->slab != NCX_SLAB_BUSY) { - - for (m = 1, i = 0; m; m <<= 1, i++) { - if ((page->slab & m)) { - continue; - } - - page->slab |= m; - - if (page->slab == NCX_SLAB_BUSY) { - prev = (ncx_slab_page_t *) - (page->prev & ~NCX_SLAB_PAGE_MASK); - prev->next = page->next; - page->next->prev = page->prev; - - page->next = NULL; - page->prev = NCX_SLAB_EXACT; - } - - p = (page - pool->pages) << ncx_pagesize_shift; - p += i << shift; - p += (uintptr_t) pool->start; - - goto done; - } - } - - page = page->next; - - } while (page); - - } else { /* shift > ncx_slab_exact_shift */ - - n = ncx_pagesize_shift - (page->slab & NCX_SLAB_SHIFT_MASK); - n = 1 << n; - n = ((uintptr_t) 1 << n) - 1; - mask = n << NCX_SLAB_MAP_SHIFT; - - do { - if ((page->slab & NCX_SLAB_MAP_MASK) != mask) { - - for (m = (uintptr_t) 1 << NCX_SLAB_MAP_SHIFT, i = 0; - m & mask; - m <<= 1, i++) { - if ((page->slab & m)) { - continue; - } - - page->slab |= m; - - if ((page->slab & NCX_SLAB_MAP_MASK) == mask) { - prev = (ncx_slab_page_t *) - (page->prev & ~NCX_SLAB_PAGE_MASK); - prev->next = page->next; - page->next->prev = page->prev; - - page->next = NULL; - page->prev = NCX_SLAB_BIG; - } - - p = (page - pool->pages) << ncx_pagesize_shift; - p += i << shift; - p += (uintptr_t) pool->start; - - goto done; - } - } - - page = page->next; - - } while (page); - } - } - - page = ncx_slab_alloc_pages(pool, 1); - - if (page) { - if (shift < ncx_slab_exact_shift) { - p = (page - pool->pages) << ncx_pagesize_shift; - bitmap = (uintptr_t *) (pool->start + p); - - s = 1 << shift; - n = (1 << (ncx_pagesize_shift - shift)) / 8 / s; - - if (n == 0) { - n = 1; - } - - bitmap[0] = (2 << n) - 1; - - map = (1 << (ncx_pagesize_shift - shift)) / (sizeof(uintptr_t) * 8); - - for (i = 1; i < map; i++) { - bitmap[i] = 0; - } - - page->slab = shift; - page->next = &slots[slot]; - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_SMALL; - - slots[slot].next = page; - - p = ((page - pool->pages) << ncx_pagesize_shift) + s * n; - p += (uintptr_t) pool->start; - - goto done; - - } else if (shift == ncx_slab_exact_shift) { - - page->slab = 1; - page->next = &slots[slot]; - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_EXACT; - - slots[slot].next = page; - - p = (page - pool->pages) << ncx_pagesize_shift; - p += (uintptr_t) pool->start; - - goto done; - - } else { /* shift > ncx_slab_exact_shift */ - - page->slab = ((uintptr_t) 1 << NCX_SLAB_MAP_SHIFT) | shift; - page->next = &slots[slot]; - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_BIG; - - slots[slot].next = page; - - p = (page - pool->pages) << ncx_pagesize_shift; - p += (uintptr_t) pool->start; - - goto done; - } - } - - p = 0; - - done: - - return (void *) p; -} - - -void ncx_slab_free(ncx_slab_pool_t *pool, void *p) { - size_t size; - uintptr_t slab, m, *bitmap; - ncx_uint_t n, type, slot, shift, map; - ncx_slab_page_t *slots, *page; - - if ((u_char *) p < pool->start || (u_char *) p > pool->end) { -// error("ncx_slab_free(): outside of pool"); - goto fail; - } - - n = ((u_char *) p - pool->start) >> ncx_pagesize_shift; - page = &pool->pages[n]; - slab = page->slab; - type = page->prev & NCX_SLAB_PAGE_MASK; - - switch (type) { - - case NCX_SLAB_SMALL: - - shift = slab & NCX_SLAB_SHIFT_MASK; - size = 1 << shift; - - if ((uintptr_t) p & (size - 1)) { - goto wrong_chunk; - } - - n = ((uintptr_t) p & (ncx_pagesize - 1)) >> shift; - m = (uintptr_t) 1 << (n & (sizeof(uintptr_t) * 8 - 1)); - n /= (sizeof(uintptr_t) * 8); - bitmap = (uintptr_t *) ((uintptr_t) p & ~(ncx_pagesize - 1)); - - if (bitmap[n] & m) { - - if (page->next == NULL) { - slots = (ncx_slab_page_t *) - ((u_char *) pool + sizeof(ncx_slab_pool_t)); - slot = shift - pool->min_shift; - - page->next = slots[slot].next; - slots[slot].next = page; - - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_SMALL; - page->next->prev = (uintptr_t) page | NCX_SLAB_SMALL; - } - - bitmap[n] &= ~m; - - n = (1 << (ncx_pagesize_shift - shift)) / 8 / (1 << shift); - - if (n == 0) { - n = 1; - } - - if (bitmap[0] & ~(((uintptr_t) 1 << n) - 1)) { - goto done; - } - - map = (1 << (ncx_pagesize_shift - shift)) / (sizeof(uintptr_t) * 8); - - for (n = 1; n < map; n++) { - if (bitmap[n]) { - goto done; - } - } - - ncx_slab_free_pages(pool, page, 1); - - goto done; - } - - goto chunk_already_free; - - case NCX_SLAB_EXACT: - - m = (uintptr_t) 1 << - (((uintptr_t) p & (ncx_pagesize - 1)) >> ncx_slab_exact_shift); - size = ncx_slab_exact_size; - - if ((uintptr_t) p & (size - 1)) { - goto wrong_chunk; - } - - if (slab & m) { - if (slab == NCX_SLAB_BUSY) { - slots = (ncx_slab_page_t *) - ((u_char *) pool + sizeof(ncx_slab_pool_t)); - slot = ncx_slab_exact_shift - pool->min_shift; - - page->next = slots[slot].next; - slots[slot].next = page; - - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_EXACT; - page->next->prev = (uintptr_t) page | NCX_SLAB_EXACT; - } - - page->slab &= ~m; - - if (page->slab) { - goto done; - } - - ncx_slab_free_pages(pool, page, 1); - - goto done; - } - - goto chunk_already_free; - - case NCX_SLAB_BIG: - - shift = slab & NCX_SLAB_SHIFT_MASK; - size = 1 << shift; - - if ((uintptr_t) p & (size - 1)) { - goto wrong_chunk; - } - - m = (uintptr_t) 1 << ((((uintptr_t) p & (ncx_pagesize - 1)) >> shift) - + NCX_SLAB_MAP_SHIFT); - - if (slab & m) { - - if (page->next == NULL) { - slots = (ncx_slab_page_t *) - ((u_char *) pool + sizeof(ncx_slab_pool_t)); - slot = shift - pool->min_shift; - - page->next = slots[slot].next; - slots[slot].next = page; - - page->prev = (uintptr_t) &slots[slot] | NCX_SLAB_BIG; - page->next->prev = (uintptr_t) page | NCX_SLAB_BIG; - } - - page->slab &= ~m; - - if (page->slab & NCX_SLAB_MAP_MASK) { - goto done; - } - - ncx_slab_free_pages(pool, page, 1); - - goto done; - } - - goto chunk_already_free; - - case NCX_SLAB_PAGE: - - if ((uintptr_t) p & (ncx_pagesize - 1)) { - goto wrong_chunk; - } - - if (slab == NCX_SLAB_PAGE_FREE) { -// alert("ncx_slab_free(): page is already free"); - goto fail; - } - - if (slab == NCX_SLAB_PAGE_BUSY) { -// alert("ncx_slab_free(): pointer to wrong page"); - goto fail; - } - - n = ((u_char *) p - pool->start) >> ncx_pagesize_shift; - size = slab & ~NCX_SLAB_PAGE_START; - - ncx_slab_free_pages(pool, &pool->pages[n], size); - - return; - } - - /* not reached */ - - return; - - done: - - return; - - wrong_chunk: - -// error("ncx_slab_free(): pointer to wrong chunk"); - - goto fail; - - chunk_already_free: - -// error("ncx_slab_free(): chunk is already free"); - - fail: - - return; -} - - -static ncx_slab_page_t *ncx_slab_alloc_pages(ncx_slab_pool_t *pool, ncx_uint_t pages) { - ncx_slab_page_t *page, *p; - - for (page = pool->free.next; page != &pool->free; page = page->next) { - - if (page->slab >= pages) { - - if (page->slab > pages) { - page[pages].slab = page->slab - pages; - page[pages].next = page->next; - page[pages].prev = page->prev; - - p = (ncx_slab_page_t *) page->prev; - p->next = &page[pages]; - page->next->prev = (uintptr_t) &page[pages]; - - } else { - p = (ncx_slab_page_t *) page->prev; - p->next = page->next; - page->next->prev = page->prev; - } - - page->slab = pages | NCX_SLAB_PAGE_START; - page->next = NULL; - page->prev = NCX_SLAB_PAGE; - - if (--pages == 0) { - return page; - } - - for (p = page + 1; pages; pages--) { - p->slab = NCX_SLAB_PAGE_BUSY; - p->next = NULL; - p->prev = NCX_SLAB_PAGE; - p++; - } - - return page; - } - } - -// error("ncx_slab_alloc() failed: no memory"); - - return NULL; -} - -static void ncx_slab_free_pages(ncx_slab_pool_t *pool, ncx_slab_page_t *page, ncx_uint_t pages) { - ncx_slab_page_t *prev; - - if (pages > 1) { - ncx_memzero(&page[1], (pages - 1) * sizeof(ncx_slab_page_t)); - } - - if (page->next) { - prev = (ncx_slab_page_t *) (page->prev & ~NCX_SLAB_PAGE_MASK); - prev->next = page->next; - page->next->prev = page->prev; - } - - page->slab = pages; - page->prev = (uintptr_t) &pool->free; - page->next = pool->free.next; - page->next->prev = (uintptr_t) page; - - pool->free.next = page; - -#ifdef PAGE_MERGE - if (pool->pages != page) { - prev = page - 1; - if (ncx_slab_empty(pool, prev)) { - for (; prev >= pool->pages; prev--) { - if (prev->slab != 0) - { - pool->free.next = page->next; - page->next->prev = (uintptr_t) &pool->free; - - prev->slab += pages; - ncx_memzero(page, sizeof(ncx_slab_page_t)); - - page = prev; - - break; - } - } - } - } - - if ((page - pool->pages + page->slab) < ncx_real_pages) { - next = page + page->slab; - if (ncx_slab_empty(pool, next)) - { - prev = (ncx_slab_page_t *) (next->prev); - prev->next = next->next; - next->next->prev = next->prev; - - page->slab += next->slab; - ncx_memzero(next, sizeof(ncx_slab_page_t)); - } - } - -#endif -} - -void ncx_slab_stat(ncx_slab_pool_t *pool, ncx_slab_stat_t *stat) { - uintptr_t m, n, mask, slab; - uintptr_t *bitmap; - ncx_uint_t i, j, map, type, obj_size; - ncx_slab_page_t *page; - - ncx_memzero(stat, sizeof(ncx_slab_stat_t)); - - page = pool->pages; - stat->pages = (pool->end - pool->start) / ncx_pagesize; - - for (i = 0; i < stat->pages; i++) { - slab = page->slab; - type = page->prev & NCX_SLAB_PAGE_MASK; - - switch (type) { - - case NCX_SLAB_SMALL: - - n = (page - pool->pages) << ncx_pagesize_shift; - bitmap = (uintptr_t *) (pool->start + n); - - obj_size = 1 << slab; - map = (1 << (ncx_pagesize_shift - slab)) - / (sizeof(uintptr_t) * 8); - - for (j = 0; j < map; j++) { - for (m = 1; m; m <<= 1) { - if ((bitmap[j] & m)) { - stat->used_size += obj_size; - stat->b_small += obj_size; - } - - } - } - - stat->p_small++; - - break; - - case NCX_SLAB_EXACT: - - if (slab == NCX_SLAB_BUSY) { - stat->used_size += sizeof(uintptr_t) * 8 * ncx_slab_exact_size; - stat->b_exact += sizeof(uintptr_t) * 8 * ncx_slab_exact_size; - } else { - for (m = 1; m; m <<= 1) { - if (slab & m) { - stat->used_size += ncx_slab_exact_size; - stat->b_exact += ncx_slab_exact_size; - } - } - } - - stat->p_exact++; - - break; - - case NCX_SLAB_BIG: - - j = ncx_pagesize_shift - (slab & NCX_SLAB_SHIFT_MASK); - j = 1 << j; - j = ((uintptr_t) 1 << j) - 1; - mask = j << NCX_SLAB_MAP_SHIFT; - obj_size = 1 << (slab & NCX_SLAB_SHIFT_MASK); - - for (m = (uintptr_t) 1 << NCX_SLAB_MAP_SHIFT; m & mask; m <<= 1) { - if ((page->slab & m)) { - stat->used_size += obj_size; - stat->b_big += obj_size; - } - } - - stat->p_big++; - - break; - - case NCX_SLAB_PAGE: - - if (page->prev == NCX_SLAB_PAGE) { - slab = slab & ~NCX_SLAB_PAGE_START; - stat->used_size += slab * ncx_pagesize; - stat->b_page += slab * ncx_pagesize; - stat->p_page += slab; - - i += (slab - 1); - - break; - } - - default: - - if (slab > stat->max_free_pages) { - stat->max_free_pages = page->slab; - } - - stat->free_page += slab; - - i += (slab - 1); - - break; - } - - page = pool->pages + i + 1; - } - - stat->pool_size = pool->end - pool->start; - stat->used_pct = stat->used_size * 100 / stat->pool_size; -} - -static bool ncx_slab_empty(ncx_slab_pool_t *pool, ncx_slab_page_t *page) { - ncx_slab_page_t *prev; - - if (page->slab == 0) { - return true; - } - - //page->prev == PAGE | SMALL | EXACT | BIG - if (page->next == NULL) { - return false; - } - - prev = (ncx_slab_page_t *) (page->prev & ~NCX_SLAB_PAGE_MASK); - while (prev >= pool->pages) { - prev = (ncx_slab_page_t *) (prev->prev & ~NCX_SLAB_PAGE_MASK); - } - - if (prev == &pool->free) { - return true; - } - - return false; -} \ No newline at end of file diff --git a/src/mempool/mempool.h b/src/mempool/mempool.h deleted file mode 100644 index e903158..0000000 --- a/src/mempool/mempool.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef SIST2_MEMPOOL_H -#define SIST2_MEMPOOL_H - -#include -#include -#include -#include -#include - -typedef unsigned char u_char; -typedef uintptr_t ncx_uint_t; - -#ifndef NCX_ALIGNMENT -#define NCX_ALIGNMENT sizeof(unsigned long) -#endif - -#define ncx_align(d, a) (((d) + (a - 1)) & ~(a - 1)) -#define ncx_align_ptr(p, a) (u_char *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) - -#define ncx_memzero(buf, n) (void) memset(buf, 0, n) -#define ncx_memset(buf, c, n) (void) memset(buf, c, n) - -typedef struct ncx_slab_page_s ncx_slab_page_t; - -struct ncx_slab_page_s { - uintptr_t slab; - ncx_slab_page_t *next; - uintptr_t prev; -}; - -typedef struct { - size_t min_size; - size_t min_shift; - - ncx_slab_page_t *pages; - ncx_slab_page_t free; - - u_char *start; - u_char *end; - - //ncx_shmtx_t mutex; - - void *addr; -} ncx_slab_pool_t; - -typedef struct { - size_t pool_size, used_size, used_pct; - size_t pages, free_page; - size_t p_small, p_exact, p_big, p_page; - size_t b_small, b_exact, b_big, b_page; - size_t max_free_pages; -} ncx_slab_stat_t; - -void ncx_slab_init(ncx_slab_pool_t *mempool); - -void *ncx_slab_alloc(ncx_slab_pool_t *mempool, size_t size); - -void ncx_slab_free(ncx_slab_pool_t *mempool, void *p); - -void ncx_slab_stat(ncx_slab_pool_t *mempool, ncx_slab_stat_t *stat); - -#endif //SIST2_MEMPOOL_H diff --git a/src/parsing/fs_util.h b/src/parsing/fs_util.h new file mode 100644 index 0000000..a3b257e --- /dev/null +++ b/src/parsing/fs_util.h @@ -0,0 +1,42 @@ +#ifndef SIST2_FS_UTIL_H +#define SIST2_FS_UTIL_H + +#include "src/sist.h" + +#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));}; + +static int fs_read(struct vfile *f, void *buf, size_t size) { + + if (f->fd == -1) { + SHA1_Init(&f->sha1_ctx); + + f->fd = open(f->filepath, O_RDONLY); + if (f->fd == -1) { + return -1; + } + } + + int ret = (int) read(f->fd, buf, size); + + if (ret != 0 && f->calculate_checksum) { + f->has_checksum = TRUE; + safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret); + } + + return ret; +} + +static void fs_close(struct vfile *f) { + if (f->fd != -1) { + SHA1_Final(f->sha1_digest, &f->sha1_ctx); + close(f->fd); + } +} + +static void fs_reset(struct vfile *f) { + if (f->fd != -1) { + lseek(f->fd, 0, SEEK_SET); + } +} + +#endif diff --git a/src/parsing/magic_util.c b/src/parsing/magic_util.c new file mode 100644 index 0000000..e5443a8 --- /dev/null +++ b/src/parsing/magic_util.c @@ -0,0 +1,32 @@ +#include "magic_util.h" +#include "src/log.h" +#include "mime.h" +#include +#include "src/magic_generated.c" + + +char *magic_buffer_embedded(void *buffer, size_t buffer_size) { + + magic_t magic = magic_open(MAGIC_MIME_TYPE); + + const char *magic_buffers[1] = {magic_database_buffer,}; + size_t sizes[1] = {sizeof(magic_database_buffer),}; + + // TODO: check if we can reuse the magic instance + int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1); + + if (load_ret != 0) { + LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret); + } + + const char *magic_mime_str = magic_buffer(magic, buffer, buffer_size); + char *return_value = NULL; + + if (magic_mime_str != NULL) { + return_value = malloc(strlen(magic_mime_str) + 1); + strcpy(return_value, magic_mime_str); + } + + magic_close(magic); + return return_value; +} \ No newline at end of file diff --git a/src/parsing/magic_util.h b/src/parsing/magic_util.h new file mode 100644 index 0000000..8c40cb4 --- /dev/null +++ b/src/parsing/magic_util.h @@ -0,0 +1,8 @@ +#ifndef SIST2_MAGIC_UTIL_H +#define SIST2_MAGIC_UTIL_H + +#include + +char *magic_buffer_embedded(void *buffer, size_t buffer_size); + +#endif //SIST2_MAGIC_UTIL_H diff --git a/src/parsing/mime.c b/src/parsing/mime.c index 4a218df..49a26c0 100644 --- a/src/parsing/mime.c +++ b/src/parsing/mime.c @@ -1,22 +1,30 @@ #include "mime.h" +#include -unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) { - char lower[8]; - char *p = lower; +unsigned int mime_get_mime_by_ext(const char *ext) { + unsigned char lower[16]; + unsigned char *p = lower; int cnt = 0; while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) { - *p++ = (char)tolower(*ext++); + *p++ = tolower(*ext++); cnt++; } *p = '\0'; - return (size_t) g_hash_table_lookup(ext_table, lower); + + unsigned long crc = crc32(0, lower, cnt); + + unsigned int mime = mime_extension_lookup(crc); + return mime; } -unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str) { +unsigned int mime_get_mime_by_string(const char *str) { - const char * ptr = str; + const char *ptr = str; while (*ptr == ' ' || *ptr == '[') { ptr++; } - return (size_t) g_hash_table_lookup(mime_table, ptr); + + unsigned long crc = crc32(0, (unsigned char *) ptr, strlen(ptr)); + + return mime_name_lookup(crc); } diff --git a/src/parsing/mime.h b/src/parsing/mime.h index de7b1be..50b380a 100644 --- a/src/parsing/mime.h +++ b/src/parsing/mime.h @@ -51,14 +51,14 @@ enum major_mime { enum mime; -GHashTable *mime_get_mime_table(); +unsigned int mime_name_lookup(unsigned long mime_crc32); -GHashTable *mime_get_ext_table(); +unsigned int mime_extension_lookup(unsigned long extension_crc32); -char *mime_get_mime_text(unsigned int); +const char *mime_get_mime_text(unsigned int); -unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext); +unsigned int mime_get_mime_by_ext(const char *ext); -unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str); +unsigned int mime_get_mime_by_string(const char *str); #endif diff --git a/src/parsing/mime_generated.c b/src/parsing/mime_generated.c index 3b8fed8..6eedeae 100644 --- a/src/parsing/mime_generated.c +++ b/src/parsing/mime_generated.c @@ -1,460 +1,458 @@ // **Generated by mime.py** #ifndef MIME_GENERATED_C #define MIME_GENERATED_C -#include - #include enum mime { - application_CDFV2=655361, - application_CDFV2_corrupt=655362, - application_arj=655363, - application_base64=655364, - application_binhex=655365, - application_book=655366, - application_clariscad=655367, - application_commonground=655368, - application_csv=655369, - application_dicom=655370, - application_drafting=655371, - application_epub_zip=655372 | 0x40000000, - application_freeloader=655373, - application_futuresplash=655374, - application_groupwise=655375, - application_gzip=655376 | 0x08000000, - application_hta=655377, - application_i_deas=655378, - application_iges=655379, - application_inf=655380, - application_java=655381, - application_java_archive=655382, - application_javascript=655383, - application_json=655384, - application_marc=655385, - application_mbedlet=655386, - application_mime=655387, - application_mspowerpoint=655388, - application_msword=655389, - application_ndjson=655390, - application_netmc=655391, - application_octet_stream=655392, - application_oda=655393, - application_ogg=655394, - application_pdf=655395 | 0x40000000, - application_pgp_keys=655396, - application_pgp_signature=655397, - application_pkcs7_signature=655398, - application_pkix_cert=655399, - application_postscript=655400, - application_pro_eng=655401, - application_ringing_tones=655402, - application_smil=655403, - application_solids=655404, - application_sounder=655405, - application_step=655406, - application_streamingmedia=655407, - application_vda=655408, - application_vnd_amazon_mobi8_ebook=655409 | 0x02000000, - application_vnd_coffeescript=655410, - application_vnd_fdf=655411, - application_vnd_font_fontforge_sfd=655412, - application_vnd_hp_hpgl=655413, - application_vnd_iccprofile=655414, - application_vnd_lotus_1_2_3=655415, - application_vnd_ms_cab_compressed=655416, - application_vnd_ms_excel=655417, - application_vnd_ms_fontobject=655418, - application_vnd_ms_opentype=655419 | 0x20000000, - application_vnd_ms_outlook=655420, - application_vnd_ms_pki_certstore=655421, - application_vnd_ms_pki_pko=655422, - application_vnd_ms_pki_seccat=655423, - application_vnd_ms_powerpoint=655424, - application_vnd_ms_project=655425, - application_vnd_oasis_opendocument_base=655426, - application_vnd_oasis_opendocument_formula=655427, - application_vnd_oasis_opendocument_graphics=655428, - application_vnd_oasis_opendocument_presentation=655429, - application_vnd_oasis_opendocument_spreadsheet=655430, - application_vnd_oasis_opendocument_text=655431, - application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000, - application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000, - application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000, - application_vnd_symbian_install=655435, - application_vnd_tcpdump_pcap=655436, - application_vnd_wap_wmlc=655437, - application_vnd_wap_wmlscriptc=655438, - application_vnd_xara=655439, - application_vocaltec_media_desc=655440, - application_vocaltec_media_file=655441, - application_warc=655442, - application_winhelp=655443, - application_wordperfect=655444, - application_x_123=655445, - application_x_7z_compressed=655446 | 0x10000000, - application_x_aim=655447, - application_x_apple_diskimage=655448, - application_x_arc=655449 | 0x10000000, - application_x_archive=655450, - application_x_atari_7800_rom=655451, - application_x_authorware_bin=655452, - application_x_authorware_map=655453, - application_x_authorware_seg=655454, - application_x_avira_qua=655455, - application_x_bcpio=655456, - application_x_bittorrent=655457, - application_x_bsh=655458, - application_x_bytecode_python=655459, - application_x_bzip=655460, - application_x_bzip2=655461 | 0x08000000, - application_x_cbr=655462, - application_x_cbz=655463, - application_x_cdlink=655464, - application_x_chat=655465, - application_x_chrome_extension=655466, - application_x_cocoa=655467, - application_x_conference=655468, - application_x_coredump=655469, - application_x_cpio=655470, - application_x_dbf=655471, - application_x_dbt=655472, - application_x_debian_package=655473, - application_x_deepv=655474, - application_x_director=655475, - application_x_dmp=655476, - application_x_dosdriver=655477, - application_x_dosexec=655478, - application_x_dvi=655479, - application_x_elc=655480, - application_x_empty=1, - application_x_envoy=655481, - application_x_esrehber=655482, - application_x_excel=655483, - application_x_executable=655484, - application_x_font_gdos=655485, - application_x_font_pf2=655486, - application_x_font_pfm=655487, - application_x_font_sfn=655488, - application_x_font_ttf=655489 | 0x20000000, - application_x_fptapplication_x_dbt=655490, - application_x_freelance=655491, - application_x_gamecube_rom=655492, - application_x_gdbm=655493, - application_x_gettext_translation=655494, - application_x_git=655495, - application_x_gsp=655496, - application_x_gss=655497, - application_x_gtar=655498, - application_x_gzip=655499, - application_x_hdf=655500, - application_x_helpfile=655501, - application_x_httpd_imap=655502, - application_x_ima=655503, - application_x_innosetup=655504, - application_x_internett_signup=655505, - application_x_inventor=655506, - application_x_ip2=655507, - application_x_java_applet=655508, - application_x_java_commerce=655509, - application_x_java_image=655510, - application_x_java_jmod=655511, - application_x_java_keystore=655512, - application_x_kdelnk=655513, - application_x_koan=655514, - application_x_latex=655515, - application_x_livescreen=655516, - application_x_lotus=655517, - application_x_lz4=655518 | 0x08000000, - application_x_lz4_json=655519, - application_x_lzh=655520, - application_x_lzh_compressed=655521, - application_x_lzip=655522 | 0x08000000, - application_x_lzma=655523 | 0x08000000, - application_x_lzop=655524 | 0x08000000, - application_x_lzx=655525, - application_x_mach_binary=655526, - application_x_mach_executable=655527, - application_x_magic_cap_package_1_0=655528, - application_x_mathcad=655529, - application_x_maxis_dbpf=655530, - application_x_meme=655531, - application_x_midi=655532, - application_x_mif=655533, - application_x_mix_transfer=655534, - application_x_mobipocket_ebook=655535 | 0x02000000, - application_x_ms_compress_szdd=655536, - application_x_ms_pdb=655537, - application_x_ms_reader=655538, - application_x_msaccess=655539, - application_x_n64_rom=655540, - application_x_navi_animation=655541, - application_x_navidoc=655542, - application_x_navimap=655543, - application_x_navistyle=655544, - application_x_nes_rom=655545, - application_x_netcdf=655546, - application_x_newton_compatible_pkg=655547, - application_x_nintendo_ds_rom=655548, - application_x_object=655549, - application_x_omc=655550, - application_x_omcdatamaker=655551, - application_x_omcregerator=655552, - application_x_pagemaker=655553, - application_x_pcl=655554, - application_x_pgp_keyring=655555, - application_x_pixclscript=655556, - application_x_pkcs7_certreqresp=655557, - application_x_pkcs7_signature=655558, - application_x_project=655559, - application_x_qpro=655560, - application_x_rar=655561 | 0x10000000, - application_x_rpm=655562, - application_x_sdp=655563, - application_x_sea=655564, - application_x_seelogo=655565, - application_x_setupscript=655566, - application_x_shar=655567, - application_x_sharedlib=655568, - application_x_shockwave_flash=655569, - application_x_snappy_framed=655570, - application_x_sprite=655571, - application_x_sqlite3=655572, - application_x_stargallery_thm=655573, - application_x_stuffit=655574, - application_x_sv4cpio=655575, - application_x_sv4crc=655576, - application_x_tar=655577 | 0x10000000, - application_x_tbook=655578, - application_x_terminfo=655579, - application_x_terminfo2=655580, - application_x_tex_tfm=655581, - application_x_texinfo=655582, - application_x_ustar=655583, - application_x_visio=655584, - application_x_vnd_audioexplosion_mzz=655585, - application_x_vnd_ls_xpix=655586, - application_x_vrml=655587, - application_x_wais_source=655588, - application_x_wine_extension_ini=655589, - application_x_wintalk=655590, - application_x_world=655591, - application_x_wri=655592, - application_x_x509_ca_cert=655593, - application_x_xz=655594 | 0x08000000, - application_x_zip=655595, - application_x_zstd=655596 | 0x08000000, - application_x_zstd_dictionary=655597, - application_xml=655598, - application_zip=655599 | 0x10000000, - application_zlib=655600, - audio_basic=458993 | 0x80000000, - audio_it=458994, - audio_make=458995, - audio_mid=458996, - audio_midi=458997, - audio_mp4=458998, - audio_mpeg=458999, - audio_ogg=459000, - audio_s3m=459001, - audio_tsp_audio=459002, - audio_tsplayer=459003, - audio_vnd_qcelp=459004, - audio_voxware=459005, - audio_x_aiff=459006, - audio_x_flac=459007, - audio_x_gsm=459008, - audio_x_hx_aac_adts=459009, - audio_x_jam=459010, - audio_x_liveaudio=459011, - audio_x_m4a=459012, - audio_x_midi=459013, - audio_x_mod=459014, - audio_x_mp4a_latm=459015, - audio_x_mpeg_3=459016, - audio_x_mpequrl=459017, - audio_x_nspaudio=459018, - audio_x_pn_realaudio=459019, - audio_x_psid=459020, - audio_x_realaudio=459021, - audio_x_s3m=459022, - audio_x_twinvq=459023, - audio_x_twinvq_plugin=459024, - audio_x_voc=459025, - audio_x_wav=459026, - audio_x_xbox_executable=459027 | 0x80000000, - audio_x_xbox360_executable=459028 | 0x80000000, - audio_xm=459029, - font_otf=327958 | 0x20000000, - font_sfnt=327959 | 0x20000000, - font_woff=327960 | 0x20000000, - font_woff2=327961 | 0x20000000, - image_bmp=524570, - image_cmu_raster=524571, - image_fif=524572, - image_florian=524573, - image_g3fax=524574, - image_gif=524575, - image_heic=524576, - image_ief=524577, - image_jpeg=524578, - image_jutvision=524579, - image_naplps=524580, - image_pict=524581, - image_png=524582, - image_svg=524583 | 0x80000000, - image_svg_xml=524584 | 0x80000000, - image_tiff=524585, - image_vnd_adobe_photoshop=524586 | 0x80000000, - image_vnd_djvu=524587 | 0x80000000, - image_vnd_fpx=524588, - image_vnd_microsoft_icon=524589, - image_vnd_rn_realflash=524590, - image_vnd_rn_realpix=524591, - image_vnd_wap_wbmp=524592, - image_vnd_xiff=524593, - image_webp=524594, - image_wmf=524595, - image_x_3ds=524596, - image_x_adobe_dng=524597 | 0x00800000, - image_x_award_bioslogo=524598, - image_x_canon_cr2=524599 | 0x00800000, - image_x_canon_crw=524600 | 0x00800000, - image_x_cmu_raster=524601, - image_x_cur=524602, - image_x_dcraw=524603 | 0x00800000, - image_x_dwg=524604, - image_x_eps=524605, - image_x_epson_erf=524606 | 0x00800000, - image_x_exr=524607, - image_x_fuji_raf=524608 | 0x00800000, - image_x_gem=524609, - image_x_icns=524610, - image_x_icon=524611 | 0x80000000, - image_x_jg=524612, - image_x_jps=524613, - image_x_kodak_dcr=524614 | 0x00800000, - image_x_kodak_k25=524615 | 0x00800000, - image_x_kodak_kdc=524616 | 0x00800000, - image_x_minolta_mrw=524617 | 0x00800000, - image_x_ms_bmp=524618, - image_x_niff=524619, - image_x_nikon_nef=524620 | 0x00800000, - image_x_olympus_orf=524621 | 0x00800000, - image_x_panasonic_raw=524622 | 0x00800000, - image_x_pcx=524623, - image_x_pentax_pef=524624 | 0x00800000, - image_x_pict=524625, - image_x_portable_bitmap=524626, - image_x_portable_graymap=524627, - image_x_portable_pixmap=524628, - image_x_quicktime=524629, - image_x_rgb=524630, - image_x_sigma_x3f=524631 | 0x00800000, - image_x_sony_arw=524632 | 0x00800000, - image_x_sony_sr2=524633 | 0x00800000, - image_x_sony_srf=524634 | 0x00800000, - image_x_tga=524635, - image_x_tiff=524636, - image_x_win_bitmap=524637, - image_x_xcf=524638 | 0x80000000, - image_x_xpixmap=524639 | 0x80000000, - image_x_xwindowdump=524640, - message_news=196961, - message_rfc822=196962, - model_vnd_dwf=65891, - model_vnd_gdl=65892, - model_vnd_gs_gdl=65893, - model_vrml=65894, - model_x_pov=65895, - sist2_sidecar=2, - text_PGP=590184, - text_asp=590185, - text_css=590186, - text_html=590187 | 0x01000000, - text_javascript=590188, - text_mcf=590189, - text_pascal=590190, - text_plain=590191, - text_richtext=590192, - text_rtf=590193, - text_scriplet=590194, - text_tab_separated_values=590195, - text_troff=590196, - text_uri_list=590197, - text_vnd_abc=590198, - text_vnd_fmi_flexstor=590199, - text_vnd_wap_wml=590200, - text_vnd_wap_wmlscript=590201, - text_webviewhtml=590202, - text_x_Algol68=590203, - text_x_asm=590204, - text_x_audiosoft_intra=590205, - text_x_awk=590206, - text_x_bcpl=590207, - text_x_c=590208, - text_x_c__=590209, - text_x_component=590210, - text_x_diff=590211, - text_x_fortran=590212, - text_x_java=590213, - text_x_la_asf=590214, - text_x_lisp=590215, - text_x_m=590216, - text_x_m4=590217, - text_x_makefile=590218, - text_x_ms_regedit=590219, - text_x_msdos_batch=590220, - text_x_objective_c=590221, - text_x_pascal=590222, - text_x_perl=590223, - text_x_php=590224, - text_x_po=590225, - text_x_python=590226, - text_x_ruby=590227, - text_x_sass=590228, - text_x_scss=590229, - text_x_server_parsed_html=590230, - text_x_setext=590231, - text_x_sgml=590232 | 0x01000000, - text_x_shellscript=590233, - text_x_speech=590234, - text_x_tcl=590235, - text_x_tex=590236, - text_x_uil=590237, - text_x_uuencode=590238, - text_x_vcalendar=590239, - text_x_vcard=590240, - text_xml=590241 | 0x01000000, - video_MP2T=393634, - video_animaflex=393635, - video_avi=393636, - video_avs_video=393637, - video_mp4=393638, - video_mpeg=393639, - video_quicktime=393640, - video_vdo=393641, - video_vivo=393642, - video_vnd_rn_realvideo=393643, - video_vosaic=393644, - video_webm=393645, - video_x_amt_demorun=393646, - video_x_amt_showrun=393647, - video_x_atomic3d_feature=393648, - video_x_dl=393649, - video_x_dv=393650, - video_x_fli=393651, - video_x_flv=393652, - video_x_isvideo=393653, - video_x_jng=393654 | 0x80000000, - video_x_m4v=393655, - video_x_matroska=393656, - video_x_mng=393657, - video_x_motion_jpeg=393658, - video_x_ms_asf=393659, - video_x_msvideo=393660, - video_x_qtc=393661, - video_x_sgi_movie=393662, - x_epoc_x_sisx_app=721343, +application_CDFV2=655361, +application_CDFV2_corrupt=655362, +application_arj=655363, +application_base64=655364, +application_binhex=655365, +application_book=655366, +application_clariscad=655367, +application_commonground=655368, +application_csv=655369, +application_dicom=655370, +application_drafting=655371, +application_epub_zip=655372 | 0x40000000, +application_freeloader=655373, +application_futuresplash=655374, +application_groupwise=655375, +application_gzip=655376 | 0x08000000, +application_hta=655377, +application_i_deas=655378, +application_iges=655379, +application_inf=655380, +application_java=655381, +application_java_archive=655382, +application_javascript=655383, +application_json=655384, +application_marc=655385, +application_mbedlet=655386, +application_mime=655387, +application_mspowerpoint=655388, +application_msword=655389, +application_ndjson=655390, +application_netmc=655391, +application_octet_stream=655392, +application_oda=655393, +application_ogg=655394, +application_pdf=655395 | 0x40000000, +application_pgp_keys=655396, +application_pgp_signature=655397, +application_pkcs7_signature=655398, +application_pkix_cert=655399, +application_postscript=655400, +application_pro_eng=655401, +application_ringing_tones=655402, +application_smil=655403, +application_solids=655404, +application_sounder=655405, +application_step=655406, +application_streamingmedia=655407, +application_vda=655408, +application_vnd_amazon_mobi8_ebook=655409 | 0x02000000, +application_vnd_coffeescript=655410, +application_vnd_fdf=655411, +application_vnd_font_fontforge_sfd=655412, +application_vnd_hp_hpgl=655413, +application_vnd_iccprofile=655414, +application_vnd_lotus_1_2_3=655415, +application_vnd_ms_cab_compressed=655416, +application_vnd_ms_excel=655417, +application_vnd_ms_fontobject=655418, +application_vnd_ms_opentype=655419 | 0x20000000, +application_vnd_ms_outlook=655420, +application_vnd_ms_pki_certstore=655421, +application_vnd_ms_pki_pko=655422, +application_vnd_ms_pki_seccat=655423, +application_vnd_ms_powerpoint=655424, +application_vnd_ms_project=655425, +application_vnd_oasis_opendocument_base=655426, +application_vnd_oasis_opendocument_formula=655427, +application_vnd_oasis_opendocument_graphics=655428, +application_vnd_oasis_opendocument_presentation=655429, +application_vnd_oasis_opendocument_spreadsheet=655430, +application_vnd_oasis_opendocument_text=655431, +application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000, +application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000, +application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000, +application_vnd_symbian_install=655435, +application_vnd_tcpdump_pcap=655436, +application_vnd_wap_wmlc=655437, +application_vnd_wap_wmlscriptc=655438, +application_vnd_xara=655439, +application_vocaltec_media_desc=655440, +application_vocaltec_media_file=655441, +application_warc=655442, +application_winhelp=655443, +application_wordperfect=655444, +application_x_123=655445, +application_x_7z_compressed=655446 | 0x10000000, +application_x_aim=655447, +application_x_apple_diskimage=655448, +application_x_arc=655449 | 0x10000000, +application_x_archive=655450, +application_x_atari_7800_rom=655451, +application_x_authorware_bin=655452, +application_x_authorware_map=655453, +application_x_authorware_seg=655454, +application_x_avira_qua=655455, +application_x_bcpio=655456, +application_x_bittorrent=655457, +application_x_bsh=655458, +application_x_bytecode_python=655459, +application_x_bzip=655460, +application_x_bzip2=655461 | 0x08000000, +application_x_cbr=655462, +application_x_cbz=655463, +application_x_cdlink=655464, +application_x_chat=655465, +application_x_chrome_extension=655466, +application_x_cocoa=655467, +application_x_conference=655468, +application_x_coredump=655469, +application_x_cpio=655470, +application_x_dbf=655471, +application_x_dbt=655472, +application_x_debian_package=655473, +application_x_deepv=655474, +application_x_director=655475, +application_x_dmp=655476, +application_x_dosdriver=655477, +application_x_dosexec=655478, +application_x_dvi=655479, +application_x_elc=655480, +application_x_empty=1, +application_x_envoy=655481, +application_x_esrehber=655482, +application_x_excel=655483, +application_x_executable=655484, +application_x_font_gdos=655485, +application_x_font_pf2=655486, +application_x_font_pfm=655487, +application_x_font_sfn=655488, +application_x_font_ttf=655489 | 0x20000000, +application_x_fptapplication_x_dbt=655490, +application_x_freelance=655491, +application_x_gamecube_rom=655492, +application_x_gdbm=655493, +application_x_gettext_translation=655494, +application_x_git=655495, +application_x_gsp=655496, +application_x_gss=655497, +application_x_gtar=655498, +application_x_gzip=655499, +application_x_hdf=655500, +application_x_helpfile=655501, +application_x_httpd_imap=655502, +application_x_ima=655503, +application_x_innosetup=655504, +application_x_internett_signup=655505, +application_x_inventor=655506, +application_x_ip2=655507, +application_x_java_applet=655508, +application_x_java_commerce=655509, +application_x_java_image=655510, +application_x_java_jmod=655511, +application_x_java_keystore=655512, +application_x_kdelnk=655513, +application_x_koan=655514, +application_x_latex=655515, +application_x_livescreen=655516, +application_x_lotus=655517, +application_x_lz4=655518 | 0x08000000, +application_x_lz4_json=655519, +application_x_lzh=655520, +application_x_lzh_compressed=655521, +application_x_lzip=655522 | 0x08000000, +application_x_lzma=655523 | 0x08000000, +application_x_lzop=655524 | 0x08000000, +application_x_lzx=655525, +application_x_mach_binary=655526, +application_x_mach_executable=655527, +application_x_magic_cap_package_1_0=655528, +application_x_mathcad=655529, +application_x_maxis_dbpf=655530, +application_x_meme=655531, +application_x_midi=655532, +application_x_mif=655533, +application_x_mix_transfer=655534, +application_x_mobipocket_ebook=655535 | 0x02000000, +application_x_ms_compress_szdd=655536, +application_x_ms_pdb=655537, +application_x_ms_reader=655538, +application_x_msaccess=655539, +application_x_n64_rom=655540, +application_x_navi_animation=655541, +application_x_navidoc=655542, +application_x_navimap=655543, +application_x_navistyle=655544, +application_x_nes_rom=655545, +application_x_netcdf=655546, +application_x_newton_compatible_pkg=655547, +application_x_nintendo_ds_rom=655548, +application_x_object=655549, +application_x_omc=655550, +application_x_omcdatamaker=655551, +application_x_omcregerator=655552, +application_x_pagemaker=655553, +application_x_pcl=655554, +application_x_pgp_keyring=655555, +application_x_pixclscript=655556, +application_x_pkcs7_certreqresp=655557, +application_x_pkcs7_signature=655558, +application_x_project=655559, +application_x_qpro=655560, +application_x_rar=655561 | 0x10000000, +application_x_rpm=655562, +application_x_sdp=655563, +application_x_sea=655564, +application_x_seelogo=655565, +application_x_setupscript=655566, +application_x_shar=655567, +application_x_sharedlib=655568, +application_x_shockwave_flash=655569, +application_x_snappy_framed=655570, +application_x_sprite=655571, +application_x_sqlite3=655572, +application_x_stargallery_thm=655573, +application_x_stuffit=655574, +application_x_sv4cpio=655575, +application_x_sv4crc=655576, +application_x_tar=655577 | 0x10000000, +application_x_tbook=655578, +application_x_terminfo=655579, +application_x_terminfo2=655580, +application_x_tex_tfm=655581, +application_x_texinfo=655582, +application_x_ustar=655583, +application_x_visio=655584, +application_x_vnd_audioexplosion_mzz=655585, +application_x_vnd_ls_xpix=655586, +application_x_vrml=655587, +application_x_wais_source=655588, +application_x_wine_extension_ini=655589, +application_x_wintalk=655590, +application_x_world=655591, +application_x_wri=655592, +application_x_x509_ca_cert=655593, +application_x_xz=655594 | 0x08000000, +application_x_zip=655595, +application_x_zstd=655596 | 0x08000000, +application_x_zstd_dictionary=655597, +application_xml=655598, +application_zip=655599 | 0x10000000, +application_zlib=655600, +audio_basic=458993 | 0x80000000, +audio_it=458994, +audio_make=458995, +audio_mid=458996, +audio_midi=458997, +audio_mp4=458998, +audio_mpeg=458999, +audio_ogg=459000, +audio_s3m=459001, +audio_tsp_audio=459002, +audio_tsplayer=459003, +audio_vnd_qcelp=459004, +audio_voxware=459005, +audio_x_aiff=459006, +audio_x_flac=459007, +audio_x_gsm=459008, +audio_x_hx_aac_adts=459009, +audio_x_jam=459010, +audio_x_liveaudio=459011, +audio_x_m4a=459012, +audio_x_midi=459013, +audio_x_mod=459014, +audio_x_mp4a_latm=459015, +audio_x_mpeg_3=459016, +audio_x_mpequrl=459017, +audio_x_nspaudio=459018, +audio_x_pn_realaudio=459019, +audio_x_psid=459020, +audio_x_realaudio=459021, +audio_x_s3m=459022, +audio_x_twinvq=459023, +audio_x_twinvq_plugin=459024, +audio_x_voc=459025, +audio_x_wav=459026, +audio_x_xbox_executable=459027 | 0x80000000, +audio_x_xbox360_executable=459028 | 0x80000000, +audio_xm=459029, +font_otf=327958 | 0x20000000, +font_sfnt=327959 | 0x20000000, +font_woff=327960 | 0x20000000, +font_woff2=327961 | 0x20000000, +image_bmp=524570, +image_cmu_raster=524571, +image_fif=524572, +image_florian=524573, +image_g3fax=524574, +image_gif=524575, +image_heic=524576, +image_ief=524577, +image_jpeg=524578, +image_jutvision=524579, +image_naplps=524580, +image_pict=524581, +image_png=524582, +image_svg=524583 | 0x80000000, +image_svg_xml=524584 | 0x80000000, +image_tiff=524585, +image_vnd_adobe_photoshop=524586 | 0x80000000, +image_vnd_djvu=524587 | 0x80000000, +image_vnd_fpx=524588, +image_vnd_microsoft_icon=524589, +image_vnd_rn_realflash=524590, +image_vnd_rn_realpix=524591, +image_vnd_wap_wbmp=524592, +image_vnd_xiff=524593, +image_webp=524594, +image_wmf=524595, +image_x_3ds=524596, +image_x_adobe_dng=524597 | 0x00800000, +image_x_award_bioslogo=524598, +image_x_canon_cr2=524599 | 0x00800000, +image_x_canon_crw=524600 | 0x00800000, +image_x_cmu_raster=524601, +image_x_cur=524602, +image_x_dcraw=524603 | 0x00800000, +image_x_dwg=524604, +image_x_eps=524605, +image_x_epson_erf=524606 | 0x00800000, +image_x_exr=524607, +image_x_fuji_raf=524608 | 0x00800000, +image_x_gem=524609, +image_x_icns=524610, +image_x_icon=524611 | 0x80000000, +image_x_jg=524612, +image_x_jps=524613, +image_x_kodak_dcr=524614 | 0x00800000, +image_x_kodak_k25=524615 | 0x00800000, +image_x_kodak_kdc=524616 | 0x00800000, +image_x_minolta_mrw=524617 | 0x00800000, +image_x_ms_bmp=524618, +image_x_niff=524619, +image_x_nikon_nef=524620 | 0x00800000, +image_x_olympus_orf=524621 | 0x00800000, +image_x_panasonic_raw=524622 | 0x00800000, +image_x_pcx=524623, +image_x_pentax_pef=524624 | 0x00800000, +image_x_pict=524625, +image_x_portable_bitmap=524626, +image_x_portable_graymap=524627, +image_x_portable_pixmap=524628, +image_x_quicktime=524629, +image_x_rgb=524630, +image_x_sigma_x3f=524631 | 0x00800000, +image_x_sony_arw=524632 | 0x00800000, +image_x_sony_sr2=524633 | 0x00800000, +image_x_sony_srf=524634 | 0x00800000, +image_x_tga=524635, +image_x_tiff=524636, +image_x_win_bitmap=524637, +image_x_xcf=524638 | 0x80000000, +image_x_xpixmap=524639 | 0x80000000, +image_x_xwindowdump=524640, +message_news=196961, +message_rfc822=196962, +model_vnd_dwf=65891, +model_vnd_gdl=65892, +model_vnd_gs_gdl=65893, +model_vrml=65894, +model_x_pov=65895, +sist2_sidecar=2, +text_PGP=590184, +text_asp=590185, +text_css=590186, +text_html=590187 | 0x01000000, +text_javascript=590188, +text_mcf=590189, +text_pascal=590190, +text_plain=590191, +text_richtext=590192, +text_rtf=590193, +text_scriplet=590194, +text_tab_separated_values=590195, +text_troff=590196, +text_uri_list=590197, +text_vnd_abc=590198, +text_vnd_fmi_flexstor=590199, +text_vnd_wap_wml=590200, +text_vnd_wap_wmlscript=590201, +text_webviewhtml=590202, +text_x_Algol68=590203, +text_x_asm=590204, +text_x_audiosoft_intra=590205, +text_x_awk=590206, +text_x_bcpl=590207, +text_x_c=590208, +text_x_c__=590209, +text_x_component=590210, +text_x_diff=590211, +text_x_fortran=590212, +text_x_java=590213, +text_x_la_asf=590214, +text_x_lisp=590215, +text_x_m=590216, +text_x_m4=590217, +text_x_makefile=590218, +text_x_ms_regedit=590219, +text_x_msdos_batch=590220, +text_x_objective_c=590221, +text_x_pascal=590222, +text_x_perl=590223, +text_x_php=590224, +text_x_po=590225, +text_x_python=590226, +text_x_ruby=590227, +text_x_sass=590228, +text_x_scss=590229, +text_x_server_parsed_html=590230, +text_x_setext=590231, +text_x_sgml=590232 | 0x01000000, +text_x_shellscript=590233, +text_x_speech=590234, +text_x_tcl=590235, +text_x_tex=590236, +text_x_uil=590237, +text_x_uuencode=590238, +text_x_vcalendar=590239, +text_x_vcard=590240, +text_xml=590241 | 0x01000000, +video_MP2T=393634, +video_animaflex=393635, +video_avi=393636, +video_avs_video=393637, +video_mp4=393638, +video_mpeg=393639, +video_quicktime=393640, +video_vdo=393641, +video_vivo=393642, +video_vnd_rn_realvideo=393643, +video_vosaic=393644, +video_webm=393645, +video_x_amt_demorun=393646, +video_x_amt_showrun=393647, +video_x_atomic3d_feature=393648, +video_x_dl=393649, +video_x_dv=393650, +video_x_fli=393651, +video_x_flv=393652, +video_x_isvideo=393653, +video_x_jng=393654 | 0x80000000, +video_x_m4v=393655, +video_x_matroska=393656, +video_x_mng=393657, +video_x_motion_jpeg=393658, +video_x_ms_asf=393659, +video_x_msvideo=393660, +video_x_qtc=393661, +video_x_sgi_movie=393662, +x_epoc_x_sisx_app=721343, }; char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { case application_arj: return "application/arj"; @@ -907,1001 +905,837 @@ case image_x_sony_srf: return "image/x-sony-srf"; case image_x_epson_erf: return "image/x-epson-erf"; case sist2_sidecar: return "sist2/sidecar"; default: return NULL;}} -GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal); -g_hash_table_insert(ext_table, "arj", (gpointer)application_arj); -g_hash_table_insert(ext_table, "mme", (gpointer)application_base64); -g_hash_table_insert(ext_table, "hqx", (gpointer)application_binhex); -g_hash_table_insert(ext_table, "boo", (gpointer)application_book); -g_hash_table_insert(ext_table, "book", (gpointer)application_book); -g_hash_table_insert(ext_table, "sdv", (gpointer)application_CDFV2); -g_hash_table_insert(ext_table, "ccad", (gpointer)application_clariscad); -g_hash_table_insert(ext_table, "dp", (gpointer)application_commonground); -g_hash_table_insert(ext_table, "dcm", (gpointer)application_dicom); -g_hash_table_insert(ext_table, "drw", (gpointer)application_drafting); -g_hash_table_insert(ext_table, "epub", (gpointer)application_epub_zip); -g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader); -g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash); -g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise); -g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip); -g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip); -g_hash_table_insert(ext_table, "hta", (gpointer)application_hta); -g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas); -g_hash_table_insert(ext_table, "iges", (gpointer)application_iges); -g_hash_table_insert(ext_table, "igs", (gpointer)application_iges); -g_hash_table_insert(ext_table, "inf", (gpointer)application_inf); -g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive); -g_hash_table_insert(ext_table, "class", (gpointer)application_java); -g_hash_table_insert(ext_table, "json", (gpointer)application_json); -g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson); -g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson); -g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc); -g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet); -g_hash_table_insert(ext_table, "aps", (gpointer)application_mime); -g_hash_table_insert(ext_table, "ppz", (gpointer)application_mspowerpoint); -g_hash_table_insert(ext_table, "doc", (gpointer)application_msword); -g_hash_table_insert(ext_table, "dot", (gpointer)application_msword); -g_hash_table_insert(ext_table, "w6w", (gpointer)application_msword); -g_hash_table_insert(ext_table, "wiz", (gpointer)application_msword); -g_hash_table_insert(ext_table, "word", (gpointer)application_msword); -g_hash_table_insert(ext_table, "mcp", (gpointer)application_netmc); -g_hash_table_insert(ext_table, "bin", (gpointer)application_octet_stream); -g_hash_table_insert(ext_table, "dump", (gpointer)application_octet_stream); -g_hash_table_insert(ext_table, "gpg", (gpointer)application_octet_stream); -g_hash_table_insert(ext_table, "oda", (gpointer)application_oda); -g_hash_table_insert(ext_table, "ogv", (gpointer)application_ogg); -g_hash_table_insert(ext_table, "pdf", (gpointer)application_pdf); -g_hash_table_insert(ext_table, "pgp", (gpointer)application_pgp_signature); -g_hash_table_insert(ext_table, "p7s", (gpointer)application_pkcs7_signature); -g_hash_table_insert(ext_table, "cer", (gpointer)application_pkix_cert); -g_hash_table_insert(ext_table, "crt", (gpointer)application_pkix_cert); -g_hash_table_insert(ext_table, "ai", (gpointer)application_postscript); -g_hash_table_insert(ext_table, "ps", (gpointer)application_postscript); -g_hash_table_insert(ext_table, "part", (gpointer)application_pro_eng); -g_hash_table_insert(ext_table, "prt", (gpointer)application_pro_eng); -g_hash_table_insert(ext_table, "rng", (gpointer)application_ringing_tones); -g_hash_table_insert(ext_table, "smi", (gpointer)application_smil); -g_hash_table_insert(ext_table, "smil", (gpointer)application_smil); -g_hash_table_insert(ext_table, "sol", (gpointer)application_solids); -g_hash_table_insert(ext_table, "sdr", (gpointer)application_sounder); -g_hash_table_insert(ext_table, "step", (gpointer)application_step); -g_hash_table_insert(ext_table, "stp", (gpointer)application_step); -g_hash_table_insert(ext_table, "ssm", (gpointer)application_streamingmedia); -g_hash_table_insert(ext_table, "vda", (gpointer)application_vda); -g_hash_table_insert(ext_table, "fdf", (gpointer)application_vnd_fdf); -g_hash_table_insert(ext_table, "sfd", (gpointer)application_vnd_font_fontforge_sfd); -g_hash_table_insert(ext_table, "hgl", (gpointer)application_vnd_hp_hpgl); -g_hash_table_insert(ext_table, "hpg", (gpointer)application_vnd_hp_hpgl); -g_hash_table_insert(ext_table, "hpgl", (gpointer)application_vnd_hp_hpgl); -g_hash_table_insert(ext_table, "icm", (gpointer)application_vnd_iccprofile); -g_hash_table_insert(ext_table, "cab", (gpointer)application_vnd_ms_cab_compressed); -g_hash_table_insert(ext_table, "xlb", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "xlc", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "xll", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "xlm", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "xls", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "xlw", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(ext_table, "eot", (gpointer)application_vnd_ms_fontobject); -g_hash_table_insert(ext_table, "otf", (gpointer)application_vnd_ms_opentype); -g_hash_table_insert(ext_table, "sst", (gpointer)application_vnd_ms_pki_certstore); -g_hash_table_insert(ext_table, "pko", (gpointer)application_vnd_ms_pki_pko); -g_hash_table_insert(ext_table, "cat", (gpointer)application_vnd_ms_pki_seccat); -g_hash_table_insert(ext_table, "pot", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(ext_table, "ppa", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(ext_table, "pps", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(ext_table, "ppt", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(ext_table, "pwz", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(ext_table, "mpp", (gpointer)application_vnd_ms_project); -g_hash_table_insert(ext_table, "odb", (gpointer)application_vnd_oasis_opendocument_base); -g_hash_table_insert(ext_table, "odf", (gpointer)application_vnd_oasis_opendocument_formula); -g_hash_table_insert(ext_table, "odg", (gpointer)application_vnd_oasis_opendocument_graphics); -g_hash_table_insert(ext_table, "odp", (gpointer)application_vnd_oasis_opendocument_presentation); -g_hash_table_insert(ext_table, "ods", (gpointer)application_vnd_oasis_opendocument_spreadsheet); -g_hash_table_insert(ext_table, "odt", (gpointer)application_vnd_oasis_opendocument_text); -g_hash_table_insert(ext_table, "pptx", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation); -g_hash_table_insert(ext_table, "xlsx", (gpointer)application_vnd_openxmlformats_officedocument_spreadsheetml_sheet); -g_hash_table_insert(ext_table, "docx", (gpointer)application_vnd_openxmlformats_officedocument_wordprocessingml_document); -g_hash_table_insert(ext_table, "pcap", (gpointer)application_vnd_tcpdump_pcap); -g_hash_table_insert(ext_table, "wmlc", (gpointer)application_vnd_wap_wmlc); -g_hash_table_insert(ext_table, "wmlsc", (gpointer)application_vnd_wap_wmlscriptc); -g_hash_table_insert(ext_table, "web", (gpointer)application_vnd_xara); -g_hash_table_insert(ext_table, "vmd", (gpointer)application_vocaltec_media_desc); -g_hash_table_insert(ext_table, "vmf", (gpointer)application_vocaltec_media_file); -g_hash_table_insert(ext_table, "warc", (gpointer)application_warc); -g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp); -g_hash_table_insert(ext_table, "wp", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "wp5", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "wp6", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "wpd", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "w60", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "w61", (gpointer)application_wordperfect); -g_hash_table_insert(ext_table, "wk1", (gpointer)application_x_123); -g_hash_table_insert(ext_table, "7z", (gpointer)application_x_7z_compressed); -g_hash_table_insert(ext_table, "aim", (gpointer)application_x_aim); -g_hash_table_insert(ext_table, "a", (gpointer)application_x_archive); -g_hash_table_insert(ext_table, "a78", (gpointer)application_x_atari_7800_rom); -g_hash_table_insert(ext_table, "aab", (gpointer)application_x_authorware_bin); -g_hash_table_insert(ext_table, "aam", (gpointer)application_x_authorware_map); -g_hash_table_insert(ext_table, "aas", (gpointer)application_x_authorware_seg); -g_hash_table_insert(ext_table, "bcpio", (gpointer)application_x_bcpio); -g_hash_table_insert(ext_table, "torrent", (gpointer)application_x_bittorrent); -g_hash_table_insert(ext_table, "bsh", (gpointer)application_x_bsh); -g_hash_table_insert(ext_table, "pyc", (gpointer)application_x_bytecode_python); -g_hash_table_insert(ext_table, "boz", (gpointer)application_x_bzip2); -g_hash_table_insert(ext_table, "bz2", (gpointer)application_x_bzip2); -g_hash_table_insert(ext_table, "bz", (gpointer)application_x_bzip); -g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr); -g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz); -g_hash_table_insert(ext_table, "vcd", (gpointer)application_x_cdlink); -g_hash_table_insert(ext_table, "cha", (gpointer)application_x_chat); -g_hash_table_insert(ext_table, "chat", (gpointer)application_x_chat); -g_hash_table_insert(ext_table, "cco", (gpointer)application_x_cocoa); -g_hash_table_insert(ext_table, "nsc", (gpointer)application_x_conference); -g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio); -g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf); -g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package); -g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv); -g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director); -g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director); -g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp); -g_hash_table_insert(ext_table, "dll", (gpointer)application_x_dosexec); -g_hash_table_insert(ext_table, "dvi", (gpointer)application_x_dvi); -g_hash_table_insert(ext_table, "elc", (gpointer)application_x_elc); -g_hash_table_insert(ext_table, "env", (gpointer)application_x_envoy); -g_hash_table_insert(ext_table, "evy", (gpointer)application_x_envoy); -g_hash_table_insert(ext_table, "es", (gpointer)application_x_esrehber); -g_hash_table_insert(ext_table, "xla", (gpointer)application_x_excel); -g_hash_table_insert(ext_table, "xld", (gpointer)application_x_excel); -g_hash_table_insert(ext_table, "xlk", (gpointer)application_x_excel); -g_hash_table_insert(ext_table, "xlt", (gpointer)application_x_excel); -g_hash_table_insert(ext_table, "xlv", (gpointer)application_x_excel); -g_hash_table_insert(ext_table, "exe", (gpointer)application_x_executable); -g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2); -g_hash_table_insert(ext_table, "pfm", (gpointer)application_x_font_pfm); -g_hash_table_insert(ext_table, "ttf", (gpointer)application_x_font_ttf); -g_hash_table_insert(ext_table, "ttc", (gpointer)application_x_font_ttf); -g_hash_table_insert(ext_table, "pre", (gpointer)application_x_freelance); -g_hash_table_insert(ext_table, "gsp", (gpointer)application_x_gsp); -g_hash_table_insert(ext_table, "gss", (gpointer)application_x_gss); -g_hash_table_insert(ext_table, "gtar", (gpointer)application_x_gtar); -g_hash_table_insert(ext_table, "gzip", (gpointer)application_x_gzip); -g_hash_table_insert(ext_table, "hdf", (gpointer)application_x_hdf); -g_hash_table_insert(ext_table, "help", (gpointer)application_x_helpfile); -g_hash_table_insert(ext_table, "imap", (gpointer)application_x_httpd_imap); -g_hash_table_insert(ext_table, "ima", (gpointer)application_x_ima); -g_hash_table_insert(ext_table, "ins", (gpointer)application_x_internett_signup); -g_hash_table_insert(ext_table, "iv", (gpointer)application_x_inventor); -g_hash_table_insert(ext_table, "ip", (gpointer)application_x_ip2); -g_hash_table_insert(ext_table, "jcm", (gpointer)application_x_java_commerce); -g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod); -g_hash_table_insert(ext_table, "skd", (gpointer)application_x_koan); -g_hash_table_insert(ext_table, "skm", (gpointer)application_x_koan); -g_hash_table_insert(ext_table, "skp", (gpointer)application_x_koan); -g_hash_table_insert(ext_table, "skt", (gpointer)application_x_koan); -g_hash_table_insert(ext_table, "latex", (gpointer)application_x_latex); -g_hash_table_insert(ext_table, "ltx", (gpointer)application_x_latex); -g_hash_table_insert(ext_table, "ivy", (gpointer)application_x_livescreen); -g_hash_table_insert(ext_table, "wq1", (gpointer)application_x_lotus); -g_hash_table_insert(ext_table, "jsonlz4", (gpointer)application_x_lz4_json); -g_hash_table_insert(ext_table, "lz4", (gpointer)application_x_lz4); -g_hash_table_insert(ext_table, "lzh", (gpointer)application_x_lzh); -g_hash_table_insert(ext_table, "lz", (gpointer)application_x_lzip); -g_hash_table_insert(ext_table, "lzma", (gpointer)application_x_lzma); -g_hash_table_insert(ext_table, "lzo", (gpointer)application_x_lzop); -g_hash_table_insert(ext_table, "lzx", (gpointer)application_x_lzx); -g_hash_table_insert(ext_table, "jnilib", (gpointer)application_x_mach_binary); -g_hash_table_insert(ext_table, "dylib", (gpointer)application_x_mach_binary); -g_hash_table_insert(ext_table, "mc$", (gpointer)application_x_magic_cap_package_1_0); -g_hash_table_insert(ext_table, "mcd", (gpointer)application_x_mathcad); -g_hash_table_insert(ext_table, "mm", (gpointer)application_x_meme); -g_hash_table_insert(ext_table, "midi", (gpointer)application_x_midi); -g_hash_table_insert(ext_table, "mif", (gpointer)application_x_mif); -g_hash_table_insert(ext_table, "nix", (gpointer)application_x_mix_transfer); -g_hash_table_insert(ext_table, "opf", (gpointer)application_xml); -g_hash_table_insert(ext_table, "mobi", (gpointer)application_x_mobipocket_ebook); -g_hash_table_insert(ext_table, "azw", (gpointer)application_vnd_amazon_mobi8_ebook); -g_hash_table_insert(ext_table, "azw3", (gpointer)application_vnd_amazon_mobi8_ebook); -g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess); -g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd); -g_hash_table_insert(ext_table, "pdb", (gpointer)application_x_ms_pdb); -g_hash_table_insert(ext_table, "lit", (gpointer)application_x_ms_reader); -g_hash_table_insert(ext_table, "z64", (gpointer)application_x_n64_rom); -g_hash_table_insert(ext_table, "ani", (gpointer)application_x_navi_animation); -g_hash_table_insert(ext_table, "nvd", (gpointer)application_x_navidoc); -g_hash_table_insert(ext_table, "map", (gpointer)application_x_navimap); -g_hash_table_insert(ext_table, "stl", (gpointer)application_x_navistyle); -g_hash_table_insert(ext_table, "nes", (gpointer)application_x_nes_rom); -g_hash_table_insert(ext_table, "cdf", (gpointer)application_x_netcdf); -g_hash_table_insert(ext_table, "nc", (gpointer)application_x_netcdf); -g_hash_table_insert(ext_table, "pkg", (gpointer)application_x_newton_compatible_pkg); -g_hash_table_insert(ext_table, "o", (gpointer)application_x_object); -g_hash_table_insert(ext_table, "omcd", (gpointer)application_x_omcdatamaker); -g_hash_table_insert(ext_table, "omc", (gpointer)application_x_omc); -g_hash_table_insert(ext_table, "omcr", (gpointer)application_x_omcregerator); -g_hash_table_insert(ext_table, "pm4", (gpointer)application_x_pagemaker); -g_hash_table_insert(ext_table, "pm5", (gpointer)application_x_pagemaker); -g_hash_table_insert(ext_table, "pcl", (gpointer)application_x_pcl); -g_hash_table_insert(ext_table, "plx", (gpointer)application_x_pixclscript); -g_hash_table_insert(ext_table, "p7r", (gpointer)application_x_pkcs7_certreqresp); -g_hash_table_insert(ext_table, "p7a", (gpointer)application_x_pkcs7_signature); -g_hash_table_insert(ext_table, "mpc", (gpointer)application_x_project); -g_hash_table_insert(ext_table, "mpt", (gpointer)application_x_project); -g_hash_table_insert(ext_table, "mpv", (gpointer)application_x_project); -g_hash_table_insert(ext_table, "mpx", (gpointer)application_x_project); -g_hash_table_insert(ext_table, "wb1", (gpointer)application_x_qpro); -g_hash_table_insert(ext_table, "rar", (gpointer)application_x_rar); -g_hash_table_insert(ext_table, "rpm", (gpointer)application_x_rpm); -g_hash_table_insert(ext_table, "sdp", (gpointer)application_x_sdp); -g_hash_table_insert(ext_table, "sea", (gpointer)application_x_sea); -g_hash_table_insert(ext_table, "sl", (gpointer)application_x_seelogo); -g_hash_table_insert(ext_table, "so", (gpointer)application_x_sharedlib); -g_hash_table_insert(ext_table, "shar", (gpointer)application_x_shar); -g_hash_table_insert(ext_table, "swf", (gpointer)application_x_shockwave_flash); -g_hash_table_insert(ext_table, "spr", (gpointer)application_x_sprite); -g_hash_table_insert(ext_table, "sprite", (gpointer)application_x_sprite); -g_hash_table_insert(ext_table, "sit", (gpointer)application_x_stuffit); -g_hash_table_insert(ext_table, "sv4cpio", (gpointer)application_x_sv4cpio); -g_hash_table_insert(ext_table, "sv4crc", (gpointer)application_x_sv4crc); -g_hash_table_insert(ext_table, "tar", (gpointer)application_x_tar); -g_hash_table_insert(ext_table, "sbk", (gpointer)application_x_tbook); -g_hash_table_insert(ext_table, "tbk", (gpointer)application_x_tbook); -g_hash_table_insert(ext_table, "texi", (gpointer)application_x_texinfo); -g_hash_table_insert(ext_table, "texinfo", (gpointer)application_x_texinfo); -g_hash_table_insert(ext_table, "tfm", (gpointer)application_x_tex_tfm); -g_hash_table_insert(ext_table, "ustar", (gpointer)application_x_ustar); -g_hash_table_insert(ext_table, "vsd", (gpointer)application_x_visio); -g_hash_table_insert(ext_table, "vst", (gpointer)application_x_visio); -g_hash_table_insert(ext_table, "vsw", (gpointer)application_x_visio); -g_hash_table_insert(ext_table, "mzz", (gpointer)application_x_vnd_audioexplosion_mzz); -g_hash_table_insert(ext_table, "xpix", (gpointer)application_x_vnd_ls_xpix); -g_hash_table_insert(ext_table, "vrml", (gpointer)application_x_vrml); -g_hash_table_insert(ext_table, "src", (gpointer)application_x_wais_source); -g_hash_table_insert(ext_table, "wsrc", (gpointer)application_x_wais_source); -g_hash_table_insert(ext_table, "wtk", (gpointer)application_x_wintalk); -g_hash_table_insert(ext_table, "svr", (gpointer)application_x_world); -g_hash_table_insert(ext_table, "wri", (gpointer)application_x_wri); -g_hash_table_insert(ext_table, "der", (gpointer)application_x_x509_ca_cert); -g_hash_table_insert(ext_table, "xz", (gpointer)application_x_xz); -g_hash_table_insert(ext_table, "zst", (gpointer)application_x_zstd); -g_hash_table_insert(ext_table, "zip", (gpointer)application_zip); -g_hash_table_insert(ext_table, "z", (gpointer)application_zlib); -g_hash_table_insert(ext_table, "au", (gpointer)audio_basic); -g_hash_table_insert(ext_table, "it", (gpointer)audio_it); -g_hash_table_insert(ext_table, "funk", (gpointer)audio_make); -g_hash_table_insert(ext_table, "my", (gpointer)audio_make); -g_hash_table_insert(ext_table, "pfunk", (gpointer)audio_make); -g_hash_table_insert(ext_table, "kar", (gpointer)audio_midi); -g_hash_table_insert(ext_table, "rmi", (gpointer)audio_mid); -g_hash_table_insert(ext_table, "m4b", (gpointer)audio_mp4); -g_hash_table_insert(ext_table, "m2a", (gpointer)audio_mpeg); -g_hash_table_insert(ext_table, "mpa", (gpointer)audio_mpeg); -g_hash_table_insert(ext_table, "ogg", (gpointer)audio_ogg); -g_hash_table_insert(ext_table, "s3m", (gpointer)audio_s3m); -g_hash_table_insert(ext_table, "tsi", (gpointer)audio_tsp_audio); -g_hash_table_insert(ext_table, "tsp", (gpointer)audio_tsplayer); -g_hash_table_insert(ext_table, "qcp", (gpointer)audio_vnd_qcelp); -g_hash_table_insert(ext_table, "vox", (gpointer)audio_voxware); -g_hash_table_insert(ext_table, "aiff", (gpointer)audio_x_aiff); -g_hash_table_insert(ext_table, "aif", (gpointer)audio_x_aiff); -g_hash_table_insert(ext_table, "flac", (gpointer)audio_x_flac); -g_hash_table_insert(ext_table, "gsd", (gpointer)audio_x_gsm); -g_hash_table_insert(ext_table, "gsm", (gpointer)audio_x_gsm); -g_hash_table_insert(ext_table, "jam", (gpointer)audio_x_jam); -g_hash_table_insert(ext_table, "lam", (gpointer)audio_x_liveaudio); -g_hash_table_insert(ext_table, "m4a", (gpointer)audio_x_m4a); -g_hash_table_insert(ext_table, "mid", (gpointer)audio_x_midi); -g_hash_table_insert(ext_table, "mp3", (gpointer)audio_x_mpeg_3); -g_hash_table_insert(ext_table, "xm", (gpointer)audio_xm); -g_hash_table_insert(ext_table, "lma", (gpointer)audio_x_nspaudio); -g_hash_table_insert(ext_table, "ram", (gpointer)audio_x_pn_realaudio); -g_hash_table_insert(ext_table, "rm", (gpointer)audio_x_pn_realaudio); -g_hash_table_insert(ext_table, "rmm", (gpointer)audio_x_pn_realaudio); -g_hash_table_insert(ext_table, "rmp", (gpointer)audio_x_pn_realaudio); -g_hash_table_insert(ext_table, "sid", (gpointer)audio_x_psid); -g_hash_table_insert(ext_table, "ra", (gpointer)audio_x_realaudio); -g_hash_table_insert(ext_table, "vqe", (gpointer)audio_x_twinvq_plugin); -g_hash_table_insert(ext_table, "vql", (gpointer)audio_x_twinvq_plugin); -g_hash_table_insert(ext_table, "vqf", (gpointer)audio_x_twinvq); -g_hash_table_insert(ext_table, "voc", (gpointer)audio_x_voc); -g_hash_table_insert(ext_table, "wav", (gpointer)audio_x_wav); -g_hash_table_insert(ext_table, "xex", (gpointer)audio_x_xbox360_executable); -g_hash_table_insert(ext_table, "xbe", (gpointer)audio_x_xbox_executable); -g_hash_table_insert(ext_table, "woff2", (gpointer)font_woff2); -g_hash_table_insert(ext_table, "woff", (gpointer)font_woff); -g_hash_table_insert(ext_table, "rast", (gpointer)image_cmu_raster); -g_hash_table_insert(ext_table, "fif", (gpointer)image_fif); -g_hash_table_insert(ext_table, "flo", (gpointer)image_florian); -g_hash_table_insert(ext_table, "turbot", (gpointer)image_florian); -g_hash_table_insert(ext_table, "g3", (gpointer)image_g3fax); -g_hash_table_insert(ext_table, "gif", (gpointer)image_gif); -g_hash_table_insert(ext_table, "heic", (gpointer)image_heic); -g_hash_table_insert(ext_table, "ief", (gpointer)image_ief); -g_hash_table_insert(ext_table, "iefs", (gpointer)image_ief); -g_hash_table_insert(ext_table, "jfif", (gpointer)image_jpeg); -g_hash_table_insert(ext_table, "jfif-tbnl", (gpointer)image_jpeg); -g_hash_table_insert(ext_table, "jpe", (gpointer)image_jpeg); -g_hash_table_insert(ext_table, "jpeg", (gpointer)image_jpeg); -g_hash_table_insert(ext_table, "jpg", (gpointer)image_jpeg); -g_hash_table_insert(ext_table, "jut", (gpointer)image_jutvision); -g_hash_table_insert(ext_table, "nap", (gpointer)image_naplps); -g_hash_table_insert(ext_table, "naplps", (gpointer)image_naplps); -g_hash_table_insert(ext_table, "pic", (gpointer)image_pict); -g_hash_table_insert(ext_table, "pict", (gpointer)image_pict); -g_hash_table_insert(ext_table, "png", (gpointer)image_png); -g_hash_table_insert(ext_table, "x-png", (gpointer)image_png); -g_hash_table_insert(ext_table, "svg", (gpointer)image_svg); -g_hash_table_insert(ext_table, "psd", (gpointer)image_vnd_adobe_photoshop); -g_hash_table_insert(ext_table, "djvu", (gpointer)image_vnd_djvu); -g_hash_table_insert(ext_table, "fpx", (gpointer)image_vnd_fpx); -g_hash_table_insert(ext_table, "rf", (gpointer)image_vnd_rn_realflash); -g_hash_table_insert(ext_table, "rp", (gpointer)image_vnd_rn_realpix); -g_hash_table_insert(ext_table, "wbmp", (gpointer)image_vnd_wap_wbmp); -g_hash_table_insert(ext_table, "xif", (gpointer)image_vnd_xiff); -g_hash_table_insert(ext_table, "webp", (gpointer)image_webp); -g_hash_table_insert(ext_table, "3ds", (gpointer)image_x_3ds); -g_hash_table_insert(ext_table, "ras", (gpointer)image_x_cmu_raster); -g_hash_table_insert(ext_table, "tga", (gpointer)image_x_cur); -g_hash_table_insert(ext_table, "dwg", (gpointer)image_x_dwg); -g_hash_table_insert(ext_table, "dxf", (gpointer)image_x_dwg); -g_hash_table_insert(ext_table, "svf", (gpointer)image_x_dwg); -g_hash_table_insert(ext_table, "exr", (gpointer)image_x_exr); -g_hash_table_insert(ext_table, "ico", (gpointer)image_x_icon); -g_hash_table_insert(ext_table, "art", (gpointer)image_x_jg); -g_hash_table_insert(ext_table, "jps", (gpointer)image_x_jps); -g_hash_table_insert(ext_table, "bm", (gpointer)image_x_ms_bmp); -g_hash_table_insert(ext_table, "bmp", (gpointer)image_x_ms_bmp); -g_hash_table_insert(ext_table, "nif", (gpointer)image_x_niff); -g_hash_table_insert(ext_table, "niff", (gpointer)image_x_niff); -g_hash_table_insert(ext_table, "pcx", (gpointer)image_x_pcx); -g_hash_table_insert(ext_table, "pct", (gpointer)image_x_pict); -g_hash_table_insert(ext_table, "pbm", (gpointer)image_x_portable_bitmap); -g_hash_table_insert(ext_table, "pgm", (gpointer)image_x_portable_graymap); -g_hash_table_insert(ext_table, "ppm", (gpointer)image_x_portable_pixmap); -g_hash_table_insert(ext_table, "qif", (gpointer)image_x_quicktime); -g_hash_table_insert(ext_table, "qti", (gpointer)image_x_quicktime); -g_hash_table_insert(ext_table, "qtif", (gpointer)image_x_quicktime); -g_hash_table_insert(ext_table, "rgb", (gpointer)image_x_rgb); -g_hash_table_insert(ext_table, "tif", (gpointer)image_x_tiff); -g_hash_table_insert(ext_table, "tiff", (gpointer)image_x_tiff); -g_hash_table_insert(ext_table, "xcf", (gpointer)image_x_xcf); -g_hash_table_insert(ext_table, "xpm", (gpointer)image_x_xpixmap); -g_hash_table_insert(ext_table, "xwd", (gpointer)image_x_xwindowdump); -g_hash_table_insert(ext_table, "mht", (gpointer)message_rfc822); -g_hash_table_insert(ext_table, "mhtml", (gpointer)message_rfc822); -g_hash_table_insert(ext_table, "mime", (gpointer)message_rfc822); -g_hash_table_insert(ext_table, "dwf", (gpointer)model_vnd_dwf); -g_hash_table_insert(ext_table, "gdl", (gpointer)model_vnd_gdl); -g_hash_table_insert(ext_table, "gdsl", (gpointer)model_vnd_gs_gdl); -g_hash_table_insert(ext_table, "wrz", (gpointer)model_vrml); -g_hash_table_insert(ext_table, "pov", (gpointer)model_x_pov); -g_hash_table_insert(ext_table, "asp", (gpointer)text_asp); -g_hash_table_insert(ext_table, "css", (gpointer)text_css); -g_hash_table_insert(ext_table, "acgi", (gpointer)text_html); -g_hash_table_insert(ext_table, "htm", (gpointer)text_html); -g_hash_table_insert(ext_table, "html", (gpointer)text_html); -g_hash_table_insert(ext_table, "htmls", (gpointer)text_html); -g_hash_table_insert(ext_table, "htx", (gpointer)text_html); -g_hash_table_insert(ext_table, "shtml", (gpointer)text_html); -g_hash_table_insert(ext_table, "js", (gpointer)text_javascript); -g_hash_table_insert(ext_table, "mcf", (gpointer)text_mcf); -g_hash_table_insert(ext_table, "pas", (gpointer)text_pascal); -g_hash_table_insert(ext_table, "com", (gpointer)text_plain); -g_hash_table_insert(ext_table, "cmd", (gpointer)text_plain); -g_hash_table_insert(ext_table, "conf", (gpointer)text_plain); -g_hash_table_insert(ext_table, "def", (gpointer)text_plain); -g_hash_table_insert(ext_table, "g", (gpointer)text_plain); -g_hash_table_insert(ext_table, "idc", (gpointer)text_plain); -g_hash_table_insert(ext_table, "list", (gpointer)text_plain); -g_hash_table_insert(ext_table, "lst", (gpointer)text_plain); -g_hash_table_insert(ext_table, "mar", (gpointer)text_plain); -g_hash_table_insert(ext_table, "sdml", (gpointer)text_plain); -g_hash_table_insert(ext_table, "text", (gpointer)text_plain); -g_hash_table_insert(ext_table, "txt", (gpointer)text_plain); -g_hash_table_insert(ext_table, "md", (gpointer)text_plain); -g_hash_table_insert(ext_table, "groovy", (gpointer)text_plain); -g_hash_table_insert(ext_table, "license", (gpointer)text_plain); -g_hash_table_insert(ext_table, "properties", (gpointer)text_plain); -g_hash_table_insert(ext_table, "desktop", (gpointer)text_plain); -g_hash_table_insert(ext_table, "ini", (gpointer)text_plain); -g_hash_table_insert(ext_table, "rst", (gpointer)text_plain); -g_hash_table_insert(ext_table, "cmake", (gpointer)text_plain); -g_hash_table_insert(ext_table, "ipynb", (gpointer)text_plain); -g_hash_table_insert(ext_table, "readme", (gpointer)text_plain); -g_hash_table_insert(ext_table, "less", (gpointer)text_plain); -g_hash_table_insert(ext_table, "lo", (gpointer)text_plain); -g_hash_table_insert(ext_table, "go", (gpointer)text_plain); -g_hash_table_insert(ext_table, "yml", (gpointer)text_plain); -g_hash_table_insert(ext_table, "d", (gpointer)text_plain); -g_hash_table_insert(ext_table, "cs", (gpointer)text_plain); -g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain); -g_hash_table_insert(ext_table, "srt", (gpointer)text_plain); -g_hash_table_insert(ext_table, "nfo", (gpointer)text_plain); -g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain); -g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain); -g_hash_table_insert(ext_table, "csv", (gpointer)text_plain); -g_hash_table_insert(ext_table, "eml", (gpointer)text_plain); -g_hash_table_insert(ext_table, "make", (gpointer)text_plain); -g_hash_table_insert(ext_table, "log", (gpointer)text_plain); -g_hash_table_insert(ext_table, "markdown", (gpointer)text_plain); -g_hash_table_insert(ext_table, "yaml", (gpointer)text_plain); -g_hash_table_insert(ext_table, "coffee", (gpointer)application_vnd_coffeescript); -g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext); -g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext); -g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext); -g_hash_table_insert(ext_table, "wsc", (gpointer)text_scriplet); -g_hash_table_insert(ext_table, "tsv", (gpointer)text_tab_separated_values); -g_hash_table_insert(ext_table, "man", (gpointer)text_troff); -g_hash_table_insert(ext_table, "me", (gpointer)text_troff); -g_hash_table_insert(ext_table, "ms", (gpointer)text_troff); -g_hash_table_insert(ext_table, "roff", (gpointer)text_troff); -g_hash_table_insert(ext_table, "t", (gpointer)text_troff); -g_hash_table_insert(ext_table, "tr", (gpointer)text_troff); -g_hash_table_insert(ext_table, "uji", (gpointer)text_uri_list); -g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list); -g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list); -g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list); -g_hash_table_insert(ext_table, "abc", (gpointer)text_vnd_abc); -g_hash_table_insert(ext_table, "flx", (gpointer)text_vnd_fmi_flexstor); -g_hash_table_insert(ext_table, "wmls", (gpointer)text_vnd_wap_wmlscript); -g_hash_table_insert(ext_table, "wml", (gpointer)text_vnd_wap_wml); -g_hash_table_insert(ext_table, "htt", (gpointer)text_webviewhtml); -g_hash_table_insert(ext_table, "asm", (gpointer)text_x_asm); -g_hash_table_insert(ext_table, "s", (gpointer)text_x_asm); -g_hash_table_insert(ext_table, "aip", (gpointer)text_x_audiosoft_intra); -g_hash_table_insert(ext_table, "awk", (gpointer)text_x_awk); -g_hash_table_insert(ext_table, "c", (gpointer)text_x_c); -g_hash_table_insert(ext_table, "cc", (gpointer)text_x_c); -g_hash_table_insert(ext_table, "h", (gpointer)text_x_c); -g_hash_table_insert(ext_table, "cpp", (gpointer)text_x_c__); -g_hash_table_insert(ext_table, "cxx", (gpointer)text_x_c__); -g_hash_table_insert(ext_table, "c++", (gpointer)text_x_c__); -g_hash_table_insert(ext_table, "htc", (gpointer)text_x_component); -g_hash_table_insert(ext_table, "f", (gpointer)text_x_fortran); -g_hash_table_insert(ext_table, "f77", (gpointer)text_x_fortran); -g_hash_table_insert(ext_table, "f90", (gpointer)text_x_fortran); -g_hash_table_insert(ext_table, "for", (gpointer)text_x_fortran); -g_hash_table_insert(ext_table, "jav", (gpointer)text_x_java); -g_hash_table_insert(ext_table, "java", (gpointer)text_x_java); -g_hash_table_insert(ext_table, "lsx", (gpointer)text_x_la_asf); -g_hash_table_insert(ext_table, "el", (gpointer)text_x_lisp); -g_hash_table_insert(ext_table, "m4", (gpointer)text_x_m4); -g_hash_table_insert(ext_table, "ac", (gpointer)text_x_m4); -g_hash_table_insert(ext_table, "am", (gpointer)text_x_makefile); -g_hash_table_insert(ext_table, "mak", (gpointer)text_x_makefile); -g_hash_table_insert(ext_table, "xml", (gpointer)text_xml); -g_hash_table_insert(ext_table, "pom", (gpointer)text_xml); -g_hash_table_insert(ext_table, "iml", (gpointer)text_xml); -g_hash_table_insert(ext_table, "plist", (gpointer)text_xml); -g_hash_table_insert(ext_table, "m", (gpointer)text_x_m); -g_hash_table_insert(ext_table, "bat", (gpointer)text_x_msdos_batch); -g_hash_table_insert(ext_table, "reg", (gpointer)text_x_ms_regedit); -g_hash_table_insert(ext_table, "p", (gpointer)text_x_pascal); -g_hash_table_insert(ext_table, "pl", (gpointer)text_x_perl); -g_hash_table_insert(ext_table, "php", (gpointer)text_x_php); -g_hash_table_insert(ext_table, "po", (gpointer)text_x_po); -g_hash_table_insert(ext_table, "py", (gpointer)text_x_python); -g_hash_table_insert(ext_table, "rb", (gpointer)text_x_ruby); -g_hash_table_insert(ext_table, "sass", (gpointer)text_x_sass); -g_hash_table_insert(ext_table, "scss", (gpointer)text_x_scss); -g_hash_table_insert(ext_table, "ssi", (gpointer)text_x_server_parsed_html); -g_hash_table_insert(ext_table, "etx", (gpointer)text_x_setext); -g_hash_table_insert(ext_table, "sgm", (gpointer)text_x_sgml); -g_hash_table_insert(ext_table, "sgml", (gpointer)text_x_sgml); -g_hash_table_insert(ext_table, "sh", (gpointer)text_x_shellscript); -g_hash_table_insert(ext_table, "talk", (gpointer)text_x_speech); -g_hash_table_insert(ext_table, "tex", (gpointer)text_x_tex); -g_hash_table_insert(ext_table, "uil", (gpointer)text_x_uil); -g_hash_table_insert(ext_table, "uue", (gpointer)text_x_uuencode); -g_hash_table_insert(ext_table, "vcs", (gpointer)text_x_vcalendar); -g_hash_table_insert(ext_table, "vcf", (gpointer)text_x_vcard); -g_hash_table_insert(ext_table, "afl", (gpointer)video_animaflex); -g_hash_table_insert(ext_table, "avi", (gpointer)video_avi); -g_hash_table_insert(ext_table, "avs", (gpointer)video_avs_video); -g_hash_table_insert(ext_table, "mp4", (gpointer)video_mp4); -g_hash_table_insert(ext_table, "m1v", (gpointer)video_mpeg); -g_hash_table_insert(ext_table, "m2v", (gpointer)video_mpeg); -g_hash_table_insert(ext_table, "mpe", (gpointer)video_mpeg); -g_hash_table_insert(ext_table, "mpeg", (gpointer)video_mpeg); -g_hash_table_insert(ext_table, "mpg", (gpointer)video_mpeg); -g_hash_table_insert(ext_table, "moov", (gpointer)video_quicktime); -g_hash_table_insert(ext_table, "mov", (gpointer)video_quicktime); -g_hash_table_insert(ext_table, "qt", (gpointer)video_quicktime); -g_hash_table_insert(ext_table, "vdo", (gpointer)video_vdo); -g_hash_table_insert(ext_table, "viv", (gpointer)video_vivo); -g_hash_table_insert(ext_table, "vivo", (gpointer)video_vivo); -g_hash_table_insert(ext_table, "rv", (gpointer)video_vnd_rn_realvideo); -g_hash_table_insert(ext_table, "vos", (gpointer)video_vosaic); -g_hash_table_insert(ext_table, "webm", (gpointer)video_webm); -g_hash_table_insert(ext_table, "xdr", (gpointer)video_x_amt_demorun); -g_hash_table_insert(ext_table, "xsr", (gpointer)video_x_amt_showrun); -g_hash_table_insert(ext_table, "fmf", (gpointer)video_x_atomic3d_feature); -g_hash_table_insert(ext_table, "dl", (gpointer)video_x_dl); -g_hash_table_insert(ext_table, "dif", (gpointer)video_x_dv); -g_hash_table_insert(ext_table, "dv", (gpointer)video_x_dv); -g_hash_table_insert(ext_table, "fli", (gpointer)video_x_fli); -g_hash_table_insert(ext_table, "flv", (gpointer)video_x_flv); -g_hash_table_insert(ext_table, "isu", (gpointer)video_x_isvideo); -g_hash_table_insert(ext_table, "jng", (gpointer)video_x_jng); -g_hash_table_insert(ext_table, "m4v", (gpointer)video_x_m4v); -g_hash_table_insert(ext_table, "mkv", (gpointer)video_x_matroska); -g_hash_table_insert(ext_table, "mng", (gpointer)video_x_mng); -g_hash_table_insert(ext_table, "mjpg", (gpointer)video_x_motion_jpeg); -g_hash_table_insert(ext_table, "asf", (gpointer)video_x_ms_asf); -g_hash_table_insert(ext_table, "asx", (gpointer)video_x_ms_asf); -g_hash_table_insert(ext_table, "wmv", (gpointer)video_x_ms_asf); -g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo); -g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc); -g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie); -g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie); -g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook); -g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf); -g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef); -g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf); -g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw); -g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw); -g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng); -g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2); -g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw); -g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr); -g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25); -g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc); -g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw); -g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef); -g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f); -g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw); -g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2); -g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf); -g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf); -g_hash_table_insert(ext_table, "s2meta", (gpointer)sist2_sidecar); -return ext_table;} -GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal); -g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj); -g_hash_table_insert(mime_table, "application/base64", (gpointer)application_base64); -g_hash_table_insert(mime_table, "application/binhex", (gpointer)application_binhex); -g_hash_table_insert(mime_table, "application/book", (gpointer)application_book); -g_hash_table_insert(mime_table, "application/CDFV2-corrupt", (gpointer)application_CDFV2_corrupt); -g_hash_table_insert(mime_table, "application/CDFV2", (gpointer)application_CDFV2); -g_hash_table_insert(mime_table, "application/clariscad", (gpointer)application_clariscad); -g_hash_table_insert(mime_table, "application/commonground", (gpointer)application_commonground); -g_hash_table_insert(mime_table, "application/csv", (gpointer)application_csv); -g_hash_table_insert(mime_table, "application/dicom", (gpointer)application_dicom); -g_hash_table_insert(mime_table, "application/drafting", (gpointer)application_drafting); -g_hash_table_insert(mime_table, "application/epub+zip", (gpointer)application_epub_zip); -g_hash_table_insert(mime_table, "application/freeloader", (gpointer)application_freeloader); -g_hash_table_insert(mime_table, "application/futuresplash", (gpointer)application_futuresplash); -g_hash_table_insert(mime_table, "application/groupwise", (gpointer)application_groupwise); -g_hash_table_insert(mime_table, "application/gzip", (gpointer)application_gzip); -g_hash_table_insert(mime_table, "application/hta", (gpointer)application_hta); -g_hash_table_insert(mime_table, "application/i-deas", (gpointer)application_i_deas); -g_hash_table_insert(mime_table, "application/iges", (gpointer)application_iges); -g_hash_table_insert(mime_table, "application/inf", (gpointer)application_inf); -g_hash_table_insert(mime_table, "application/java-archive", (gpointer)application_java_archive); -g_hash_table_insert(mime_table, "application/java", (gpointer)application_java); -g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript); -g_hash_table_insert(mime_table, "application/json", (gpointer)application_json); -g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson); -g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc); -g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet); -g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime); -g_hash_table_insert(mime_table, "application/mspowerpoint", (gpointer)application_mspowerpoint); -g_hash_table_insert(mime_table, "application/msword", (gpointer)application_msword); -g_hash_table_insert(mime_table, "application/netmc", (gpointer)application_netmc); -g_hash_table_insert(mime_table, "application/octet-stream", (gpointer)application_octet_stream); -g_hash_table_insert(mime_table, "application/oda", (gpointer)application_oda); -g_hash_table_insert(mime_table, "application/ogg", (gpointer)application_ogg); -g_hash_table_insert(mime_table, "application/pdf", (gpointer)application_pdf); -g_hash_table_insert(mime_table, "application/pgp-keys", (gpointer)application_pgp_keys); -g_hash_table_insert(mime_table, "application/pgp-signature", (gpointer)application_pgp_signature); -g_hash_table_insert(mime_table, "application/pkcs7-signature", (gpointer)application_pkcs7_signature); -g_hash_table_insert(mime_table, "application/pkix-cert", (gpointer)application_pkix_cert); -g_hash_table_insert(mime_table, "application/postscript", (gpointer)application_postscript); -g_hash_table_insert(mime_table, "application/pro_eng", (gpointer)application_pro_eng); -g_hash_table_insert(mime_table, "application/ringing-tones", (gpointer)application_ringing_tones); -g_hash_table_insert(mime_table, "application/smil", (gpointer)application_smil); -g_hash_table_insert(mime_table, "application/solids", (gpointer)application_solids); -g_hash_table_insert(mime_table, "application/sounder", (gpointer)application_sounder); -g_hash_table_insert(mime_table, "application/step", (gpointer)application_step); -g_hash_table_insert(mime_table, "application/streamingmedia", (gpointer)application_streamingmedia); -g_hash_table_insert(mime_table, "application/vda", (gpointer)application_vda); -g_hash_table_insert(mime_table, "application/vnd.fdf", (gpointer)application_vnd_fdf); -g_hash_table_insert(mime_table, "application/vnd.font-fontforge-sfd", (gpointer)application_vnd_font_fontforge_sfd); -g_hash_table_insert(mime_table, "application/vnd.hp-hpgl", (gpointer)application_vnd_hp_hpgl); -g_hash_table_insert(mime_table, "application/vnd.iccprofile", (gpointer)application_vnd_iccprofile); -g_hash_table_insert(mime_table, "application/vnd.lotus-1-2-3", (gpointer)application_vnd_lotus_1_2_3); -g_hash_table_insert(mime_table, "application/vnd.ms-cab-compressed", (gpointer)application_vnd_ms_cab_compressed); -g_hash_table_insert(mime_table, "application/vnd.ms-excel", (gpointer)application_vnd_ms_excel); -g_hash_table_insert(mime_table, "application/vnd.ms-fontobject", (gpointer)application_vnd_ms_fontobject); -g_hash_table_insert(mime_table, "application/vnd.ms-opentype", (gpointer)application_vnd_ms_opentype); -g_hash_table_insert(mime_table, "application/vnd.ms-pki.certstore", (gpointer)application_vnd_ms_pki_certstore); -g_hash_table_insert(mime_table, "application/vnd.ms-pki.pko", (gpointer)application_vnd_ms_pki_pko); -g_hash_table_insert(mime_table, "application/vnd.ms-pki.seccat", (gpointer)application_vnd_ms_pki_seccat); -g_hash_table_insert(mime_table, "application/vnd.ms-powerpoint", (gpointer)application_vnd_ms_powerpoint); -g_hash_table_insert(mime_table, "application/vnd.ms-project", (gpointer)application_vnd_ms_project); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.base", (gpointer)application_vnd_oasis_opendocument_base); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.formula", (gpointer)application_vnd_oasis_opendocument_formula); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.graphics", (gpointer)application_vnd_oasis_opendocument_graphics); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.presentation", (gpointer)application_vnd_oasis_opendocument_presentation); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.spreadsheet", (gpointer)application_vnd_oasis_opendocument_spreadsheet); -g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.text", (gpointer)application_vnd_oasis_opendocument_text); -g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.presentationml.presentation", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation); -g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", (gpointer)application_vnd_openxmlformats_officedocument_spreadsheetml_sheet); -g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", (gpointer)application_vnd_openxmlformats_officedocument_wordprocessingml_document); -g_hash_table_insert(mime_table, "application/vnd.symbian.install", (gpointer)application_vnd_symbian_install); -g_hash_table_insert(mime_table, "application/vnd.tcpdump.pcap", (gpointer)application_vnd_tcpdump_pcap); -g_hash_table_insert(mime_table, "application/vnd.wap.wmlc", (gpointer)application_vnd_wap_wmlc); -g_hash_table_insert(mime_table, "application/vnd.wap.wmlscriptc", (gpointer)application_vnd_wap_wmlscriptc); -g_hash_table_insert(mime_table, "application/vnd.xara", (gpointer)application_vnd_xara); -g_hash_table_insert(mime_table, "application/vocaltec-media-desc", (gpointer)application_vocaltec_media_desc); -g_hash_table_insert(mime_table, "application/vocaltec-media-file", (gpointer)application_vocaltec_media_file); -g_hash_table_insert(mime_table, "application/warc", (gpointer)application_warc); -g_hash_table_insert(mime_table, "application/winhelp", (gpointer)application_winhelp); -g_hash_table_insert(mime_table, "application/wordperfect", (gpointer)application_wordperfect); -g_hash_table_insert(mime_table, "application/x-123", (gpointer)application_x_123); -g_hash_table_insert(mime_table, "application/x-7z-compressed", (gpointer)application_x_7z_compressed); -g_hash_table_insert(mime_table, "application/x-aim", (gpointer)application_x_aim); -g_hash_table_insert(mime_table, "application/x-apple-diskimage", (gpointer)application_x_apple_diskimage); -g_hash_table_insert(mime_table, "application/x-arc", (gpointer)application_x_arc); -g_hash_table_insert(mime_table, "application/x-archive", (gpointer)application_x_archive); -g_hash_table_insert(mime_table, "application/x-atari-7800-rom", (gpointer)application_x_atari_7800_rom); -g_hash_table_insert(mime_table, "application/x-authorware-bin", (gpointer)application_x_authorware_bin); -g_hash_table_insert(mime_table, "application/x-authorware-map", (gpointer)application_x_authorware_map); -g_hash_table_insert(mime_table, "application/x-authorware-seg", (gpointer)application_x_authorware_seg); -g_hash_table_insert(mime_table, "application/x-avira-qua", (gpointer)application_x_avira_qua); -g_hash_table_insert(mime_table, "application/x-bcpio", (gpointer)application_x_bcpio); -g_hash_table_insert(mime_table, "application/x-bittorrent", (gpointer)application_x_bittorrent); -g_hash_table_insert(mime_table, "application/x-bsh", (gpointer)application_x_bsh); -g_hash_table_insert(mime_table, "application/x-bytecode.python", (gpointer)application_x_bytecode_python); -g_hash_table_insert(mime_table, "application/x-bzip2", (gpointer)application_x_bzip2); -g_hash_table_insert(mime_table, "application/x-bzip", (gpointer)application_x_bzip); -g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr); -g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz); -g_hash_table_insert(mime_table, "application/x-cdlink", (gpointer)application_x_cdlink); -g_hash_table_insert(mime_table, "application/x-chat", (gpointer)application_x_chat); -g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension); -g_hash_table_insert(mime_table, "application/x-cocoa", (gpointer)application_x_cocoa); -g_hash_table_insert(mime_table, "application/x-conference", (gpointer)application_x_conference); -g_hash_table_insert(mime_table, "application/x-coredump", (gpointer)application_x_coredump); -g_hash_table_insert(mime_table, "application/x-cpio", (gpointer)application_x_cpio); -g_hash_table_insert(mime_table, "application/x-dbf", (gpointer)application_x_dbf); -g_hash_table_insert(mime_table, "application/x-dbt", (gpointer)application_x_dbt); -g_hash_table_insert(mime_table, "application/x-debian-package", (gpointer)application_x_debian_package); -g_hash_table_insert(mime_table, "application/x-deepv", (gpointer)application_x_deepv); -g_hash_table_insert(mime_table, "application/x-director", (gpointer)application_x_director); -g_hash_table_insert(mime_table, "application/x-dmp", (gpointer)application_x_dmp); -g_hash_table_insert(mime_table, "application/x-dosdriver", (gpointer)application_x_dosdriver); -g_hash_table_insert(mime_table, "application/x-dosexec", (gpointer)application_x_dosexec); -g_hash_table_insert(mime_table, "application/x-dvi", (gpointer)application_x_dvi); -g_hash_table_insert(mime_table, "application/x-elc", (gpointer)application_x_elc); -g_hash_table_insert(mime_table, "application/x-empty", (gpointer)application_x_empty); -g_hash_table_insert(mime_table, "application/x-envoy", (gpointer)application_x_envoy); -g_hash_table_insert(mime_table, "application/x-esrehber", (gpointer)application_x_esrehber); -g_hash_table_insert(mime_table, "application/x-excel", (gpointer)application_x_excel); -g_hash_table_insert(mime_table, "application/x-executable", (gpointer)application_x_executable); -g_hash_table_insert(mime_table, "application/x-font-gdos", (gpointer)application_x_font_gdos); -g_hash_table_insert(mime_table, "application/x-font-pf2", (gpointer)application_x_font_pf2); -g_hash_table_insert(mime_table, "application/x-font-pfm", (gpointer)application_x_font_pfm); -g_hash_table_insert(mime_table, "application/x-font-sfn", (gpointer)application_x_font_sfn); -g_hash_table_insert(mime_table, "application/x-font-ttf", (gpointer)application_x_font_ttf); -g_hash_table_insert(mime_table, "application/x-fptapplication/x-dbt", (gpointer)application_x_fptapplication_x_dbt); -g_hash_table_insert(mime_table, "application/x-freelance", (gpointer)application_x_freelance); -g_hash_table_insert(mime_table, "application/x-gamecube-rom", (gpointer)application_x_gamecube_rom); -g_hash_table_insert(mime_table, "application/x-gdbm", (gpointer)application_x_gdbm); -g_hash_table_insert(mime_table, "application/x-gettext-translation", (gpointer)application_x_gettext_translation); -g_hash_table_insert(mime_table, "application/x-git", (gpointer)application_x_git); -g_hash_table_insert(mime_table, "application/x-gsp", (gpointer)application_x_gsp); -g_hash_table_insert(mime_table, "application/x-gss", (gpointer)application_x_gss); -g_hash_table_insert(mime_table, "application/x-gtar", (gpointer)application_x_gtar); -g_hash_table_insert(mime_table, "application/x-gzip", (gpointer)application_x_gzip); -g_hash_table_insert(mime_table, "application/x-hdf", (gpointer)application_x_hdf); -g_hash_table_insert(mime_table, "application/x-helpfile", (gpointer)application_x_helpfile); -g_hash_table_insert(mime_table, "application/x-httpd-imap", (gpointer)application_x_httpd_imap); -g_hash_table_insert(mime_table, "application/x-ima", (gpointer)application_x_ima); -g_hash_table_insert(mime_table, "application/x-innosetup", (gpointer)application_x_innosetup); -g_hash_table_insert(mime_table, "application/x-internett-signup", (gpointer)application_x_internett_signup); -g_hash_table_insert(mime_table, "application/x-inventor", (gpointer)application_x_inventor); -g_hash_table_insert(mime_table, "application/x-ip2", (gpointer)application_x_ip2); -g_hash_table_insert(mime_table, "application/x-java-applet", (gpointer)application_x_java_applet); -g_hash_table_insert(mime_table, "application/x-java-commerce", (gpointer)application_x_java_commerce); -g_hash_table_insert(mime_table, "application/x-java-image", (gpointer)application_x_java_image); -g_hash_table_insert(mime_table, "application/x-java-jmod", (gpointer)application_x_java_jmod); -g_hash_table_insert(mime_table, "application/x-java-keystore", (gpointer)application_x_java_keystore); -g_hash_table_insert(mime_table, "application/x-kdelnk", (gpointer)application_x_kdelnk); -g_hash_table_insert(mime_table, "application/x-koan", (gpointer)application_x_koan); -g_hash_table_insert(mime_table, "application/x-latex", (gpointer)application_x_latex); -g_hash_table_insert(mime_table, "application/x-livescreen", (gpointer)application_x_livescreen); -g_hash_table_insert(mime_table, "application/x-lotus", (gpointer)application_x_lotus); -g_hash_table_insert(mime_table, "application/x-lz4+json", (gpointer)application_x_lz4_json); -g_hash_table_insert(mime_table, "application/x-lz4", (gpointer)application_x_lz4); -g_hash_table_insert(mime_table, "application/x-lzh-compressed", (gpointer)application_x_lzh_compressed); -g_hash_table_insert(mime_table, "application/x-lzh", (gpointer)application_x_lzh); -g_hash_table_insert(mime_table, "application/x-lzip", (gpointer)application_x_lzip); -g_hash_table_insert(mime_table, "application/x-lzma", (gpointer)application_x_lzma); -g_hash_table_insert(mime_table, "application/x-lzop", (gpointer)application_x_lzop); -g_hash_table_insert(mime_table, "application/x-lzx", (gpointer)application_x_lzx); -g_hash_table_insert(mime_table, "application/x-mach-binary", (gpointer)application_x_mach_binary); -g_hash_table_insert(mime_table, "application/x-mach-executable", (gpointer)application_x_mach_executable); -g_hash_table_insert(mime_table, "application/x-magic-cap-package-1.0", (gpointer)application_x_magic_cap_package_1_0); -g_hash_table_insert(mime_table, "application/x-mathcad", (gpointer)application_x_mathcad); -g_hash_table_insert(mime_table, "application/x-maxis-dbpf", (gpointer)application_x_maxis_dbpf); -g_hash_table_insert(mime_table, "application/x-meme", (gpointer)application_x_meme); -g_hash_table_insert(mime_table, "application/x-midi", (gpointer)application_x_midi); -g_hash_table_insert(mime_table, "application/x-mif", (gpointer)application_x_mif); -g_hash_table_insert(mime_table, "application/x-mix-transfer", (gpointer)application_x_mix_transfer); -g_hash_table_insert(mime_table, "application/xml", (gpointer)application_xml); -g_hash_table_insert(mime_table, "application/x-mobipocket-ebook", (gpointer)application_x_mobipocket_ebook); -g_hash_table_insert(mime_table, "application/vnd.amazon.mobi8-ebook", (gpointer)application_vnd_amazon_mobi8_ebook); -g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess); -g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd); -g_hash_table_insert(mime_table, "application/x-ms-pdb", (gpointer)application_x_ms_pdb); -g_hash_table_insert(mime_table, "application/x-ms-reader", (gpointer)application_x_ms_reader); -g_hash_table_insert(mime_table, "application/x-n64-rom", (gpointer)application_x_n64_rom); -g_hash_table_insert(mime_table, "application/x-navi-animation", (gpointer)application_x_navi_animation); -g_hash_table_insert(mime_table, "application/x-navidoc", (gpointer)application_x_navidoc); -g_hash_table_insert(mime_table, "application/x-navimap", (gpointer)application_x_navimap); -g_hash_table_insert(mime_table, "application/x-navistyle", (gpointer)application_x_navistyle); -g_hash_table_insert(mime_table, "application/x-nes-rom", (gpointer)application_x_nes_rom); -g_hash_table_insert(mime_table, "application/x-netcdf", (gpointer)application_x_netcdf); -g_hash_table_insert(mime_table, "application/x-newton-compatible-pkg", (gpointer)application_x_newton_compatible_pkg); -g_hash_table_insert(mime_table, "application/x-nintendo-ds-rom", (gpointer)application_x_nintendo_ds_rom); -g_hash_table_insert(mime_table, "application/x-object", (gpointer)application_x_object); -g_hash_table_insert(mime_table, "application/x-omcdatamaker", (gpointer)application_x_omcdatamaker); -g_hash_table_insert(mime_table, "application/x-omc", (gpointer)application_x_omc); -g_hash_table_insert(mime_table, "application/x-omcregerator", (gpointer)application_x_omcregerator); -g_hash_table_insert(mime_table, "application/x-pagemaker", (gpointer)application_x_pagemaker); -g_hash_table_insert(mime_table, "application/x-pcl", (gpointer)application_x_pcl); -g_hash_table_insert(mime_table, "application/x-pgp-keyring", (gpointer)application_x_pgp_keyring); -g_hash_table_insert(mime_table, "application/x-pixclscript", (gpointer)application_x_pixclscript); -g_hash_table_insert(mime_table, "application/x-pkcs7-certreqresp", (gpointer)application_x_pkcs7_certreqresp); -g_hash_table_insert(mime_table, "application/x-pkcs7-signature", (gpointer)application_x_pkcs7_signature); -g_hash_table_insert(mime_table, "application/x-project", (gpointer)application_x_project); -g_hash_table_insert(mime_table, "application/x-qpro", (gpointer)application_x_qpro); -g_hash_table_insert(mime_table, "application/x-rar", (gpointer)application_x_rar); -g_hash_table_insert(mime_table, "application/x-rpm", (gpointer)application_x_rpm); -g_hash_table_insert(mime_table, "application/x-sdp", (gpointer)application_x_sdp); -g_hash_table_insert(mime_table, "application/x-sea", (gpointer)application_x_sea); -g_hash_table_insert(mime_table, "application/x-seelogo", (gpointer)application_x_seelogo); -g_hash_table_insert(mime_table, "application/x-setupscript", (gpointer)application_x_setupscript); -g_hash_table_insert(mime_table, "application/x-sharedlib", (gpointer)application_x_sharedlib); -g_hash_table_insert(mime_table, "application/x-shar", (gpointer)application_x_shar); -g_hash_table_insert(mime_table, "application/x-shockwave-flash", (gpointer)application_x_shockwave_flash); -g_hash_table_insert(mime_table, "application/x-snappy-framed", (gpointer)application_x_snappy_framed); -g_hash_table_insert(mime_table, "application/x-sprite", (gpointer)application_x_sprite); -g_hash_table_insert(mime_table, "application/x-sqlite3", (gpointer)application_x_sqlite3); -g_hash_table_insert(mime_table, "application/x-stargallery-thm", (gpointer)application_x_stargallery_thm); -g_hash_table_insert(mime_table, "application/x-stuffit", (gpointer)application_x_stuffit); -g_hash_table_insert(mime_table, "application/x-sv4cpio", (gpointer)application_x_sv4cpio); -g_hash_table_insert(mime_table, "application/x-sv4crc", (gpointer)application_x_sv4crc); -g_hash_table_insert(mime_table, "application/x-tar", (gpointer)application_x_tar); -g_hash_table_insert(mime_table, "application/x-tbook", (gpointer)application_x_tbook); -g_hash_table_insert(mime_table, "application/x-terminfo", (gpointer)application_x_terminfo); -g_hash_table_insert(mime_table, "application/x-terminfo2", (gpointer)application_x_terminfo2); -g_hash_table_insert(mime_table, "application/x-texinfo", (gpointer)application_x_texinfo); -g_hash_table_insert(mime_table, "application/x-tex-tfm", (gpointer)application_x_tex_tfm); -g_hash_table_insert(mime_table, "application/x-ustar", (gpointer)application_x_ustar); -g_hash_table_insert(mime_table, "application/x-visio", (gpointer)application_x_visio); -g_hash_table_insert(mime_table, "application/x-vnd.audioexplosion.mzz", (gpointer)application_x_vnd_audioexplosion_mzz); -g_hash_table_insert(mime_table, "application/x-vnd.ls-xpix", (gpointer)application_x_vnd_ls_xpix); -g_hash_table_insert(mime_table, "application/x-vrml", (gpointer)application_x_vrml); -g_hash_table_insert(mime_table, "application/x-wais-source", (gpointer)application_x_wais_source); -g_hash_table_insert(mime_table, "application/x-wine-extension-ini", (gpointer)application_x_wine_extension_ini); -g_hash_table_insert(mime_table, "application/x-wintalk", (gpointer)application_x_wintalk); -g_hash_table_insert(mime_table, "application/x-world", (gpointer)application_x_world); -g_hash_table_insert(mime_table, "application/x-wri", (gpointer)application_x_wri); -g_hash_table_insert(mime_table, "application/x-x509-ca-cert", (gpointer)application_x_x509_ca_cert); -g_hash_table_insert(mime_table, "application/x-xz", (gpointer)application_x_xz); -g_hash_table_insert(mime_table, "application/x-zip", (gpointer)application_x_zip); -g_hash_table_insert(mime_table, "application/x-zstd", (gpointer)application_x_zstd); -g_hash_table_insert(mime_table, "application/zip", (gpointer)application_zip); -g_hash_table_insert(mime_table, "application/zlib", (gpointer)application_zlib); -g_hash_table_insert(mime_table, "audio/basic", (gpointer)audio_basic); -g_hash_table_insert(mime_table, "audio/it", (gpointer)audio_it); -g_hash_table_insert(mime_table, "audio/make", (gpointer)audio_make); -g_hash_table_insert(mime_table, "audio/midi", (gpointer)audio_midi); -g_hash_table_insert(mime_table, "audio/mid", (gpointer)audio_mid); -g_hash_table_insert(mime_table, "audio/mp4", (gpointer)audio_mp4); -g_hash_table_insert(mime_table, "audio/mpeg", (gpointer)audio_mpeg); -g_hash_table_insert(mime_table, "audio/ogg", (gpointer)audio_ogg); -g_hash_table_insert(mime_table, "audio/s3m", (gpointer)audio_s3m); -g_hash_table_insert(mime_table, "audio/tsp-audio", (gpointer)audio_tsp_audio); -g_hash_table_insert(mime_table, "audio/tsplayer", (gpointer)audio_tsplayer); -g_hash_table_insert(mime_table, "audio/vnd.qcelp", (gpointer)audio_vnd_qcelp); -g_hash_table_insert(mime_table, "audio/voxware", (gpointer)audio_voxware); -g_hash_table_insert(mime_table, "audio/x-aiff", (gpointer)audio_x_aiff); -g_hash_table_insert(mime_table, "audio/x-flac", (gpointer)audio_x_flac); -g_hash_table_insert(mime_table, "audio/x-gsm", (gpointer)audio_x_gsm); -g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_adts); -g_hash_table_insert(mime_table, "audio/x-jam", (gpointer)audio_x_jam); -g_hash_table_insert(mime_table, "audio/x-liveaudio", (gpointer)audio_x_liveaudio); -g_hash_table_insert(mime_table, "audio/x-m4a", (gpointer)audio_x_m4a); -g_hash_table_insert(mime_table, "audio/x-midi", (gpointer)audio_x_midi); -g_hash_table_insert(mime_table, "audio/x-mod", (gpointer)audio_x_mod); -g_hash_table_insert(mime_table, "audio/x-mp4a-latm", (gpointer)audio_x_mp4a_latm); -g_hash_table_insert(mime_table, "audio/x-mpeg-3", (gpointer)audio_x_mpeg_3); -g_hash_table_insert(mime_table, "audio/x-mpequrl", (gpointer)audio_x_mpequrl); -g_hash_table_insert(mime_table, "audio/xm", (gpointer)audio_xm); -g_hash_table_insert(mime_table, "audio/x-nspaudio", (gpointer)audio_x_nspaudio); -g_hash_table_insert(mime_table, "audio/x-pn-realaudio", (gpointer)audio_x_pn_realaudio); -g_hash_table_insert(mime_table, "audio/x-psid", (gpointer)audio_x_psid); -g_hash_table_insert(mime_table, "audio/x-realaudio", (gpointer)audio_x_realaudio); -g_hash_table_insert(mime_table, "audio/x-s3m", (gpointer)audio_x_s3m); -g_hash_table_insert(mime_table, "audio/x-twinvq-plugin", (gpointer)audio_x_twinvq_plugin); -g_hash_table_insert(mime_table, "audio/x-twinvq", (gpointer)audio_x_twinvq); -g_hash_table_insert(mime_table, "audio/x-voc", (gpointer)audio_x_voc); -g_hash_table_insert(mime_table, "audio/x-wav", (gpointer)audio_x_wav); -g_hash_table_insert(mime_table, "audio/x-xbox360-executable", (gpointer)audio_x_xbox360_executable); -g_hash_table_insert(mime_table, "audio/x-xbox-executable", (gpointer)audio_x_xbox_executable); -g_hash_table_insert(mime_table, "font/otf", (gpointer)font_otf); -g_hash_table_insert(mime_table, "font/sfnt", (gpointer)font_sfnt); -g_hash_table_insert(mime_table, "font/woff2", (gpointer)font_woff2); -g_hash_table_insert(mime_table, "font/woff", (gpointer)font_woff); -g_hash_table_insert(mime_table, "image/bmp", (gpointer)image_bmp); -g_hash_table_insert(mime_table, "image/cmu-raster", (gpointer)image_cmu_raster); -g_hash_table_insert(mime_table, "image/fif", (gpointer)image_fif); -g_hash_table_insert(mime_table, "image/florian", (gpointer)image_florian); -g_hash_table_insert(mime_table, "image/g3fax", (gpointer)image_g3fax); -g_hash_table_insert(mime_table, "image/gif", (gpointer)image_gif); -g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic); -g_hash_table_insert(mime_table, "image/ief", (gpointer)image_ief); -g_hash_table_insert(mime_table, "image/jpeg", (gpointer)image_jpeg); -g_hash_table_insert(mime_table, "image/jutvision", (gpointer)image_jutvision); -g_hash_table_insert(mime_table, "image/naplps", (gpointer)image_naplps); -g_hash_table_insert(mime_table, "image/pict", (gpointer)image_pict); -g_hash_table_insert(mime_table, "image/png", (gpointer)image_png); -g_hash_table_insert(mime_table, "image/svg", (gpointer)image_svg); -g_hash_table_insert(mime_table, "image/svg+xml", (gpointer)image_svg_xml); -g_hash_table_insert(mime_table, "image/tiff", (gpointer)image_tiff); -g_hash_table_insert(mime_table, "image/vnd.adobe.photoshop", (gpointer)image_vnd_adobe_photoshop); -g_hash_table_insert(mime_table, "image/vnd.djvu", (gpointer)image_vnd_djvu); -g_hash_table_insert(mime_table, "image/vnd.fpx", (gpointer)image_vnd_fpx); -g_hash_table_insert(mime_table, "image/vnd.microsoft.icon", (gpointer)image_vnd_microsoft_icon); -g_hash_table_insert(mime_table, "image/vnd.rn-realflash", (gpointer)image_vnd_rn_realflash); -g_hash_table_insert(mime_table, "image/vnd.rn-realpix", (gpointer)image_vnd_rn_realpix); -g_hash_table_insert(mime_table, "image/vnd.wap.wbmp", (gpointer)image_vnd_wap_wbmp); -g_hash_table_insert(mime_table, "image/vnd.xiff", (gpointer)image_vnd_xiff); -g_hash_table_insert(mime_table, "image/webp", (gpointer)image_webp); -g_hash_table_insert(mime_table, "image/wmf", (gpointer)image_wmf); -g_hash_table_insert(mime_table, "image/x-3ds", (gpointer)image_x_3ds); -g_hash_table_insert(mime_table, "image/x-award-bioslogo", (gpointer)image_x_award_bioslogo); -g_hash_table_insert(mime_table, "image/x-cmu-raster", (gpointer)image_x_cmu_raster); -g_hash_table_insert(mime_table, "image/x-cur", (gpointer)image_x_cur); -g_hash_table_insert(mime_table, "image/x-dwg", (gpointer)image_x_dwg); -g_hash_table_insert(mime_table, "image/x-eps", (gpointer)image_x_eps); -g_hash_table_insert(mime_table, "image/x-exr", (gpointer)image_x_exr); -g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem); -g_hash_table_insert(mime_table, "image/x-icns", (gpointer)image_x_icns); -g_hash_table_insert(mime_table, "image/x-icon", (gpointer)image_x_icon); -g_hash_table_insert(mime_table, "image/x-jg", (gpointer)image_x_jg); -g_hash_table_insert(mime_table, "image/x-jps", (gpointer)image_x_jps); -g_hash_table_insert(mime_table, "image/x-ms-bmp", (gpointer)image_x_ms_bmp); -g_hash_table_insert(mime_table, "image/x-niff", (gpointer)image_x_niff); -g_hash_table_insert(mime_table, "image/x-pcx", (gpointer)image_x_pcx); -g_hash_table_insert(mime_table, "image/x-pict", (gpointer)image_x_pict); -g_hash_table_insert(mime_table, "image/x-portable-bitmap", (gpointer)image_x_portable_bitmap); -g_hash_table_insert(mime_table, "image/x-portable-graymap", (gpointer)image_x_portable_graymap); -g_hash_table_insert(mime_table, "image/x-portable-pixmap", (gpointer)image_x_portable_pixmap); -g_hash_table_insert(mime_table, "image/x-quicktime", (gpointer)image_x_quicktime); -g_hash_table_insert(mime_table, "image/x-rgb", (gpointer)image_x_rgb); -g_hash_table_insert(mime_table, "image/x-tga", (gpointer)image_x_tga); -g_hash_table_insert(mime_table, "image/x-tiff", (gpointer)image_x_tiff); -g_hash_table_insert(mime_table, "image/x-win-bitmap", (gpointer)image_x_win_bitmap); -g_hash_table_insert(mime_table, "image/x-xcf", (gpointer)image_x_xcf); -g_hash_table_insert(mime_table, "image/x-xpixmap", (gpointer)image_x_xpixmap); -g_hash_table_insert(mime_table, "image/x-xwindowdump", (gpointer)image_x_xwindowdump); -g_hash_table_insert(mime_table, "message/news", (gpointer)message_news); -g_hash_table_insert(mime_table, "message/rfc822", (gpointer)message_rfc822); -g_hash_table_insert(mime_table, "model/vnd.dwf", (gpointer)model_vnd_dwf); -g_hash_table_insert(mime_table, "model/vnd.gdl", (gpointer)model_vnd_gdl); -g_hash_table_insert(mime_table, "model/vnd.gs.gdl", (gpointer)model_vnd_gs_gdl); -g_hash_table_insert(mime_table, "model/vrml", (gpointer)model_vrml); -g_hash_table_insert(mime_table, "model/x-pov", (gpointer)model_x_pov); -g_hash_table_insert(mime_table, "text/asp", (gpointer)text_asp); -g_hash_table_insert(mime_table, "text/css", (gpointer)text_css); -g_hash_table_insert(mime_table, "text/html", (gpointer)text_html); -g_hash_table_insert(mime_table, "text/javascript", (gpointer)text_javascript); -g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf); -g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal); -g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP); -g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain); -g_hash_table_insert(mime_table, "application/vnd.coffeescript", (gpointer)application_vnd_coffeescript); -g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext); -g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf); -g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet); -g_hash_table_insert(mime_table, "text/tab-separated-values", (gpointer)text_tab_separated_values); -g_hash_table_insert(mime_table, "text/troff", (gpointer)text_troff); -g_hash_table_insert(mime_table, "text/uri-list", (gpointer)text_uri_list); -g_hash_table_insert(mime_table, "text/vnd.abc", (gpointer)text_vnd_abc); -g_hash_table_insert(mime_table, "text/vnd.fmi.flexstor", (gpointer)text_vnd_fmi_flexstor); -g_hash_table_insert(mime_table, "text/vnd.wap.wmlscript", (gpointer)text_vnd_wap_wmlscript); -g_hash_table_insert(mime_table, "text/vnd.wap.wml", (gpointer)text_vnd_wap_wml); -g_hash_table_insert(mime_table, "text/webviewhtml", (gpointer)text_webviewhtml); -g_hash_table_insert(mime_table, "text/x-Algol68", (gpointer)text_x_Algol68); -g_hash_table_insert(mime_table, "text/x-asm", (gpointer)text_x_asm); -g_hash_table_insert(mime_table, "text/x-audiosoft-intra", (gpointer)text_x_audiosoft_intra); -g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk); -g_hash_table_insert(mime_table, "text/x-bcpl", (gpointer)text_x_bcpl); -g_hash_table_insert(mime_table, "text/x-c", (gpointer)text_x_c); -g_hash_table_insert(mime_table, "text/x-c++", (gpointer)text_x_c__); -g_hash_table_insert(mime_table, "text/x-component", (gpointer)text_x_component); -g_hash_table_insert(mime_table, "text/x-diff", (gpointer)text_x_diff); -g_hash_table_insert(mime_table, "text/x-fortran", (gpointer)text_x_fortran); -g_hash_table_insert(mime_table, "text/x-java", (gpointer)text_x_java); -g_hash_table_insert(mime_table, "text/x-la-asf", (gpointer)text_x_la_asf); -g_hash_table_insert(mime_table, "text/x-lisp", (gpointer)text_x_lisp); -g_hash_table_insert(mime_table, "text/x-m4", (gpointer)text_x_m4); -g_hash_table_insert(mime_table, "text/x-makefile", (gpointer)text_x_makefile); -g_hash_table_insert(mime_table, "text/xml", (gpointer)text_xml); -g_hash_table_insert(mime_table, "text/x-m", (gpointer)text_x_m); -g_hash_table_insert(mime_table, "text/x-msdos-batch", (gpointer)text_x_msdos_batch); -g_hash_table_insert(mime_table, "text/x-ms-regedit", (gpointer)text_x_ms_regedit); -g_hash_table_insert(mime_table, "text/x-objective-c", (gpointer)text_x_objective_c); -g_hash_table_insert(mime_table, "text/x-pascal", (gpointer)text_x_pascal); -g_hash_table_insert(mime_table, "text/x-perl", (gpointer)text_x_perl); -g_hash_table_insert(mime_table, "text/x-php", (gpointer)text_x_php); -g_hash_table_insert(mime_table, "text/x-po", (gpointer)text_x_po); -g_hash_table_insert(mime_table, "text/x-python", (gpointer)text_x_python); -g_hash_table_insert(mime_table, "text/x-ruby", (gpointer)text_x_ruby); -g_hash_table_insert(mime_table, "text/x-sass", (gpointer)text_x_sass); -g_hash_table_insert(mime_table, "text/x-scss", (gpointer)text_x_scss); -g_hash_table_insert(mime_table, "text/x-server-parsed-html", (gpointer)text_x_server_parsed_html); -g_hash_table_insert(mime_table, "text/x-setext", (gpointer)text_x_setext); -g_hash_table_insert(mime_table, "text/x-sgml", (gpointer)text_x_sgml); -g_hash_table_insert(mime_table, "text/x-shellscript", (gpointer)text_x_shellscript); -g_hash_table_insert(mime_table, "text/x-speech", (gpointer)text_x_speech); -g_hash_table_insert(mime_table, "text/x-tcl", (gpointer)text_x_tcl); -g_hash_table_insert(mime_table, "text/x-tex", (gpointer)text_x_tex); -g_hash_table_insert(mime_table, "text/x-uil", (gpointer)text_x_uil); -g_hash_table_insert(mime_table, "text/x-uuencode", (gpointer)text_x_uuencode); -g_hash_table_insert(mime_table, "text/x-vcalendar", (gpointer)text_x_vcalendar); -g_hash_table_insert(mime_table, "text/x-vcard", (gpointer)text_x_vcard); -g_hash_table_insert(mime_table, "video/animaflex", (gpointer)video_animaflex); -g_hash_table_insert(mime_table, "video/avi", (gpointer)video_avi); -g_hash_table_insert(mime_table, "video/avs-video", (gpointer)video_avs_video); -g_hash_table_insert(mime_table, "video/MP2T", (gpointer)video_MP2T); -g_hash_table_insert(mime_table, "video/mp4", (gpointer)video_mp4); -g_hash_table_insert(mime_table, "video/mpeg", (gpointer)video_mpeg); -g_hash_table_insert(mime_table, "video/quicktime", (gpointer)video_quicktime); -g_hash_table_insert(mime_table, "video/vdo", (gpointer)video_vdo); -g_hash_table_insert(mime_table, "video/vivo", (gpointer)video_vivo); -g_hash_table_insert(mime_table, "video/vnd.rn-realvideo", (gpointer)video_vnd_rn_realvideo); -g_hash_table_insert(mime_table, "video/vosaic", (gpointer)video_vosaic); -g_hash_table_insert(mime_table, "video/webm", (gpointer)video_webm); -g_hash_table_insert(mime_table, "video/x-amt-demorun", (gpointer)video_x_amt_demorun); -g_hash_table_insert(mime_table, "video/x-amt-showrun", (gpointer)video_x_amt_showrun); -g_hash_table_insert(mime_table, "video/x-atomic3d-feature", (gpointer)video_x_atomic3d_feature); -g_hash_table_insert(mime_table, "video/x-dl", (gpointer)video_x_dl); -g_hash_table_insert(mime_table, "video/x-dv", (gpointer)video_x_dv); -g_hash_table_insert(mime_table, "video/x-fli", (gpointer)video_x_fli); -g_hash_table_insert(mime_table, "video/x-flv", (gpointer)video_x_flv); -g_hash_table_insert(mime_table, "video/x-isvideo", (gpointer)video_x_isvideo); -g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng); -g_hash_table_insert(mime_table, "video/x-m4v", (gpointer)video_x_m4v); -g_hash_table_insert(mime_table, "video/x-matroska", (gpointer)video_x_matroska); -g_hash_table_insert(mime_table, "video/x-mng", (gpointer)video_x_mng); -g_hash_table_insert(mime_table, "video/x-motion-jpeg", (gpointer)video_x_motion_jpeg); -g_hash_table_insert(mime_table, "video/x-ms-asf", (gpointer)video_x_ms_asf); -g_hash_table_insert(mime_table, "video/x-msvideo", (gpointer)video_x_msvideo); -g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc); -g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie); -g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app); -g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary); -g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook); -g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf); -g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef); -g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf); -g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw); -g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng); -g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2); -g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw); -g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw); -g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr); -g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25); -g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc); -g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw); -g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef); -g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f); -g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw); -g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2); -g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf); -g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf); -g_hash_table_insert(mime_table, "sist2/sidecar", (gpointer)sist2_sidecar); -return mime_table;} +unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) { +case 104524599:return application_arj; +case 1388642652:return application_base64; +case 3514823219:return application_binhex; +case 2340081149:case 3420824369:return application_book; +case 285308337:return application_CDFV2; +case 1954483503:return application_clariscad; +case 273534304:return application_commonground; +case 3444350831:return application_dicom; +case 1677149445:return application_drafting; +case 2293841338:return application_epub_zip; +case 3927332999:return application_freeloader; +case 3268238238:return application_futuresplash; +case 2044114573:return application_groupwise; +case 3686102973:case 919960796:return application_gzip; +case 3361975990:return application_hta; +case 4016300425:return application_i_deas; +case 4218684756:case 1542976603:return application_iges; +case 3890785273:return application_inf; +case 2028789010:return application_java_archive; +case 3981121951:return application_java; +case 1795630405:return application_json; +case 2430300356:case 2781733981:return application_ndjson; +case 1996270327:return application_marc; +case 2723818757:return application_mbedlet; +case 1348936053:return application_mime; +case 878661782:return application_mspowerpoint; +case 2252471652:case 93485219:case 3917664813:case 2853236491:case 3287381265:return application_msword; +case 2711341113:return application_netmc; +case 2854705901:case 1198289189:case 1311803834:case 2547932707:case 3876093456:return application_octet_stream; +case 2280565346:return application_oda; +case 789609574:return application_ogg; +case 250665868:return application_pdf; +case 3507043614:return application_pgp_signature; +case 4072354288:return application_pkcs7_signature; +case 324084633:case 4289790522:return application_pkix_cert; +case 156340709:case 2817149839:return application_postscript; +case 1225748678:case 3788578579:return application_pro_eng; +case 2157762558:return application_ringing_tones; +case 1305199373:case 3908728192:return application_smil; +case 261575936:return application_solids; +case 376222120:return application_sounder; +case 1136262716:case 2996880085:return application_step; +case 2665953483:return application_streamingmedia; +case 2484679325:return application_vda; +case 375063630:return application_vnd_fdf; +case 3498993275:return application_vnd_font_fontforge_sfd; +case 3611136921:case 1164758663:case 3852449971:return application_vnd_hp_hpgl; +case 3314814012:return application_vnd_iccprofile; +case 1787492089:return application_vnd_ms_cab_compressed; +case 3478635557:case 3092313267:case 686811426:case 1609095604:case 2783438039:case 2726961358:return application_vnd_ms_excel; +case 72356500:return application_vnd_ms_fontobject; +case 1397239184:return application_vnd_ms_opentype; +case 4203515915:return application_vnd_ms_pki_certstore; +case 4038361063:return application_vnd_ms_pki_pko; +case 2656977832:return application_vnd_ms_pki_seccat; +case 515732239:case 3191511418:case 1300490290:case 3555163537:case 2065614417:return application_vnd_ms_powerpoint; +case 3228862891:return application_vnd_ms_project; +case 518519768:return application_vnd_oasis_opendocument_base; +case 428490689:return application_vnd_oasis_opendocument_formula; +case 1854738263:return application_vnd_oasis_opendocument_graphics; +case 3982396048:return application_vnd_oasis_opendocument_presentation; +case 1951914794:return application_vnd_oasis_opendocument_spreadsheet; +case 3929230985:return application_vnd_oasis_opendocument_text; +case 185029164:return application_vnd_openxmlformats_officedocument_presentationml_presentation; +case 2496574992:return application_vnd_openxmlformats_officedocument_spreadsheetml_sheet; +case 3330667071:return application_vnd_openxmlformats_officedocument_wordprocessingml_document; +case 738795811:return application_vnd_tcpdump_pcap; +case 2320501507:return application_vnd_wap_wmlc; +case 3589852303:return application_vnd_wap_wmlscriptc; +case 365508689:return application_vnd_xara; +case 900824411:return application_vocaltec_media_desc; +case 3686734967:return application_vocaltec_media_file; +case 1460391352:return application_warc; +case 549983773:return application_winhelp; +case 1906478322:case 3614644754:case 1316637608:case 3407838064:case 20579870:case 1983723144:return application_wordperfect; +case 2032670097:return application_x_123; +case 1628318441:return application_x_7z_compressed; +case 828945678:return application_x_aim; +case 3904355907:return application_x_archive; +case 240214191:return application_x_atari_7800_rom; +case 1762534039:return application_x_authorware_bin; +case 4189142790:return application_x_authorware_map; +case 62784101:return application_x_authorware_seg; +case 830492586:return application_x_bcpio; +case 3704076214:return application_x_bittorrent; +case 4083746051:return application_x_bsh; +case 2180399903:return application_x_bytecode_python; +case 3869714710:case 2836727456:return application_x_bzip2; +case 2797765624:return application_x_bzip; +case 1544598878:return application_x_cbr; +case 1389051244:return application_x_cbz; +case 2872201429:return application_x_cdlink; +case 574692362:case 1704850090:return application_x_chat; +case 638445766:return application_x_cocoa; +case 1839277551:return application_x_conference; +case 890520454:return application_x_cpio; +case 1132820390:return application_x_dbf; +case 195634552:return application_x_debian_package; +case 2116912533:return application_x_deepv; +case 3131800080:case 3916585216:return application_x_director; +case 818535992:return application_x_dmp; +case 1037284150:return application_x_dosexec; +case 4254383458:return application_x_dvi; +case 2897134736:return application_x_elc; +case 4081402617:case 3789670193:return application_x_envoy; +case 2422189467:return application_x_esrehber; +case 1449039263:case 640966928:case 3062594689:case 998470004:case 3582791768:return application_x_excel; +case 1801697008:return application_x_executable; +case 1354771683:return application_x_font_pf2; +case 2870239366:return application_x_font_pfm; +case 1127964929:case 860984718:return application_x_font_ttf; +case 2338406369:return application_x_freelance; +case 3872284606:return application_x_gsp; +case 2143751684:return application_x_gss; +case 379624156:return application_x_gtar; +case 988372210:return application_x_gzip; +case 482683204:return application_x_hdf; +case 143088812:return application_x_helpfile; +case 193336380:return application_x_httpd_imap; +case 1386311065:return application_x_ima; +case 2318749458:return application_x_internett_signup; +case 1283462680:return application_x_inventor; +case 2783163181:return application_x_ip2; +case 3352474213:return application_x_java_commerce; +case 2640986657:return application_x_java_jmod; +case 1696602166:case 486318226:case 2147142731:case 2023163986:return application_x_koan; +case 215374679:case 2869014186:return application_x_latex; +case 3908779605:return application_x_livescreen; +case 3372609354:return application_x_lotus; +case 1244562587:return application_x_lz4_json; +case 1256754847:return application_x_lz4; +case 674599744:return application_x_lzh; +case 943828598:return application_x_lzip; +case 3811851781:return application_x_lzma; +case 3058763491:return application_x_lzop; +case 897745700:return application_x_lzx; +case 785325796:case 3941149528:return application_x_mach_binary; +case 3449642452:return application_x_magic_cap_package_1_0; +case 3141606468:return application_x_mathcad; +case 2726894320:return application_x_meme; +case 2599515954:return application_x_midi; +case 2946536930:return application_x_mif; +case 1474893528:return application_x_mix_transfer; +case 925165716:return application_xml; +case 3357977606:return application_x_mobipocket_ebook; +case 2917469670:case 605433051:return application_vnd_amazon_mobi8_ebook; +case 2740923043:return application_x_msaccess; +case 4218705335:return application_x_ms_compress_szdd; +case 161292181:return application_x_ms_pdb; +case 1574669981:return application_x_ms_reader; +case 244273492:return application_x_n64_rom; +case 2034558928:return application_x_navi_animation; +case 2394047497:return application_x_navidoc; +case 2477632187:return application_x_navimap; +case 2795609754:return application_x_navistyle; +case 1821278300:return application_x_nes_rom; +case 277923493:case 1847354420:return application_x_netcdf; +case 4268750805:return application_x_newton_compatible_pkg; +case 252678980:return application_x_object; +case 100793631:return application_x_omcdatamaker; +case 3089264647:return application_x_omc; +case 4074063950:return application_x_omcregerator; +case 1515671581:case 760242315:return application_x_pagemaker; +case 2707727445:return application_x_pcl; +case 1009151207:return application_x_pixclscript; +case 2243690854:return application_x_pkcs7_certreqresp; +case 16924856:return application_x_pkcs7_signature; +case 1154140277:case 3340324274:case 689384606:case 3467575705:return application_x_project; +case 2833952472:return application_x_qpro; +case 1792618458:return application_x_rar; +case 3020423487:return application_x_rpm; +case 4167225476:return application_x_sdp; +case 2345255223:return application_x_sea; +case 30348729:return application_x_seelogo; +case 2563130371:return application_x_sharedlib; +case 3706126336:return application_x_shar; +case 1834558535:return application_x_shockwave_flash; +case 952264445:case 891129758:return application_x_sprite; +case 1252092624:return application_x_stuffit; +case 3714480584:return application_x_sv4cpio; +case 3062692538:return application_x_sv4crc; +case 1851020136:return application_x_tar; +case 610123502:case 554870891:return application_x_tbook; +case 1485687582:case 4263375082:return application_x_texinfo; +case 2887632986:return application_x_tex_tfm; +case 529472938:return application_x_ustar; +case 3790619268:case 4232532704:case 1699611482:return application_x_visio; +case 3662579775:return application_x_vnd_audioexplosion_mzz; +case 836703967:return application_x_vnd_ls_xpix; +case 2889000187:return application_x_vrml; +case 1615078541:case 1219082918:return application_x_wais_source; +case 1070375909:return application_x_wintalk; +case 1855504763:return application_x_world; +case 2274911567:return application_x_wri; +case 371078428:return application_x_x509_ca_cert; +case 384792867:return application_x_xz; +case 4116556676:return application_x_zstd; +case 1109235014:return application_zip; +case 1657960367:return application_zlib; +case 491834794:return audio_basic; +case 2727245620:return audio_it; +case 1278026625:case 3092500109:case 1385818959:return audio_make; +case 2033104677:return audio_midi; +case 1275701562:return audio_mid; +case 2815980258:return audio_mp4; +case 1753973982:case 2865015129:case 2901423164:return audio_mpeg; +case 1168137364:return audio_ogg; +case 1855879118:return audio_s3m; +case 2630216279:return audio_tsp_audio; +case 4172169879:return audio_tsplayer; +case 3030880813:return audio_vnd_qcelp; +case 327566230:return audio_voxware; +case 1924052889:case 2797235334:return audio_x_aiff; +case 3600363395:return audio_x_flac; +case 4229204931:case 2244490087:return audio_x_gsm; +case 4125413607:return audio_x_jam; +case 4050234453:return audio_x_liveaudio; +case 1053893464:return audio_x_m4a; +case 1101984974:return audio_x_midi; +case 799119745:return audio_x_mpeg_3; +case 2503802084:return audio_xm; +case 1416277874:return audio_x_nspaudio; +case 3889242671:case 1876104302:case 1264872739:case 677511674:return audio_x_pn_realaudio; +case 1461090996:return audio_x_psid; +case 1717917765:return audio_x_realaudio; +case 2764116112:case 3709708340:return audio_x_twinvq_plugin; +case 1036534058:return audio_x_twinvq; +case 2581826170:return audio_x_voc; +case 1803495720:return audio_x_wav; +case 3824649750:return audio_x_xbox360_executable; +case 3484449800:return audio_x_xbox_executable; +case 1505691300:return font_woff2; +case 55690088:return font_woff; +case 4196239628:return image_cmu_raster; +case 2750773763:return image_fif; +case 2807969506:case 791360735:return image_florian; +case 3568668297:return image_g3fax; +case 2721517620:return image_gif; +case 4048480567:return image_heic; +case 68973106:case 3554509561:return image_ief; +case 694905706:case 3496410035:case 2833677253:case 3933391666:case 1189616361:return image_jpeg; +case 3206655858:return image_jutvision; +case 2448102626:case 3954137820:return image_naplps; +case 3409203534:case 348455534:return image_pict; +case 2199389072:case 562164082:return image_png; +case 54863248:return image_svg; +case 3850190390:return image_vnd_adobe_photoshop; +case 2303371901:return image_vnd_djvu; +case 3271224952:return image_vnd_fpx; +case 4160867814:return image_vnd_rn_realflash; +case 215316663:return image_vnd_rn_realpix; +case 480853153:return image_vnd_wap_wbmp; +case 3041784953:return image_vnd_xiff; +case 2659071723:return image_webp; +case 301010174:return image_x_3ds; +case 501096268:return image_x_cmu_raster; +case 3165697328:return image_x_cur; +case 53977380:case 4087909757:case 1950487814:return image_x_dwg; +case 3903861559:return image_x_exr; +case 731540752:return image_x_icon; +case 4231386708:return image_x_jg; +case 1546833556:return image_x_jps; +case 621872703:case 876913290:return image_x_ms_bmp; +case 2917542843:case 718120911:return image_x_niff; +case 3149819944:return image_x_pcx; +case 2986884099:return image_x_pict; +case 3480790402:return image_x_portable_bitmap; +case 2987348423:return image_x_portable_graymap; +case 3079460177:return image_x_portable_pixmap; +case 3130839030:case 3578773115:case 2983316714:return image_x_quicktime; +case 557094968:return image_x_rgb; +case 3159821597:case 359206964:return image_x_tiff; +case 1336024307:return image_x_xcf; +case 3114239209:return image_x_xpixmap; +case 2399316618:return image_x_xwindowdump; +case 1157813739:case 2663211800:case 1116045392:return message_rfc822; +case 1949340082:return model_vnd_dwf; +case 4148987751:return model_vnd_gdl; +case 2289923100:return model_vnd_gs_gdl; +case 52831377:return model_vrml; +case 4038267427:return model_x_pov; +case 3796050700:return text_asp; +case 2026809048:return text_css; +case 4137492127:case 3252019869:case 410646757:case 3413549060:case 2886207094:case 1097681659:return text_html; +case 398963028:return text_javascript; +case 1431272808:return text_mcf; +case 509266722:return text_pascal; +case 1689700070:case 794565824:case 351504808:case 214229345:case 30677878:case 1835907068:case 1154021400:case 3992351814:case 2107886487:case 2202503947:case 999008199:case 473390917:case 3679822420:case 1465078094:case 1466496025:case 2277716423:case 157353380:case 2002237032:case 4216257084:case 590894066:case 987584319:case 2268432115:case 3551958239:case 1436306077:case 3060306774:case 808890964:case 2564639436:case 3322219037:case 3334425408:case 3818365258:case 1403162576:case 590812979:case 1800036834:case 144986711:case 621471808:case 449607278:case 2403297477:case 2529069283:case 3929123204:return text_plain; +case 1401235891:return application_vnd_coffeescript; +case 196656302:case 1203117491:case 3183026384:return text_richtext; +case 2119613712:return text_scriplet; +case 298706850:return text_tab_separated_values; +case 1772263384:case 2891092674:case 1485186963:case 881800026:case 2238339752:case 3028401693:return text_troff; +case 101132664:case 1121950192:case 2216472865:case 1474544612:return text_uri_list; +case 891568578:return text_vnd_abc; +case 613266213:return text_vnd_fmi_flexstor; +case 2548505447:return text_vnd_wap_wmlscript; +case 984129374:return text_vnd_wap_wml; +case 2780711517:return text_webviewhtml; +case 2168803285:case 453955339:return text_x_asm; +case 1382996439:return text_x_audiosoft_intra; +case 206188516:return text_x_awk; +case 112844655:case 3685882489:case 2439710439:return text_x_c; +case 3404375201:case 216573595:case 1864149058:return text_x_c__; +case 644730778:return text_x_component; +case 1993550816:case 2611047355:case 2605110166:case 4017424888:return text_x_fortran; +case 2139166987:case 2132469458:return text_x_java; +case 3829413997:return text_x_la_asf; +case 492281966:return text_x_lisp; +case 2956915616:case 3917773051:return text_x_m4; +case 238835196:case 432563031:return text_x_makefile; +case 838129763:case 2060901327:case 739250468:case 1518088406:return text_xml; +case 3775001192:return text_x_m; +case 2677811615:return text_x_msdos_batch; +case 1667763765:return text_x_ms_regedit; +case 2181537457:return text_x_pascal; +case 719472250:return text_x_perl; +case 1452351953:return text_x_php; +case 3018528704:return text_x_po; +case 1195352721:case 1629727233:return text_x_python; +case 4285270527:return text_x_ruby; +case 3703929802:return text_x_sass; +case 3745623972:return text_x_scss; +case 2576022738:return text_x_server_parsed_html; +case 2765133093:return text_x_setext; +case 2957635486:case 2164340050:return text_x_sgml; +case 111333792:return text_x_shellscript; +case 2669991355:return text_x_speech; +case 3941433202:return text_x_tex; +case 1564511796:return text_x_uil; +case 3270208461:return text_x_uuencode; +case 685888786:return text_x_vcalendar; +case 1161572857:return text_x_vcard; +case 3254229335:return video_animaflex; +case 4217339785:return video_avi; +case 104695539:return video_avs_video; +case 2982480930:return video_mp4; +case 3228914394:case 3948413209:case 2913561920:case 2007364491:case 1135021164:return video_mpeg; +case 4034977726:case 3830260224:case 546697069:return video_quicktime; +case 1939939226:return video_vdo; +case 2724514071:case 2357097034:return video_vivo; +case 3853966722:return video_vnd_rn_realvideo; +case 2220136990:return video_vosaic; +case 4252523058:return video_webm; +case 439967305:return video_x_amt_demorun; +case 532339423:return video_x_amt_showrun; +case 3348741895:return video_x_atomic3d_feature; +case 72126767:return video_x_dl; +case 2691804781:case 4180568149:return video_x_dv; +case 1312658391:return video_x_fli; +case 3275071010:return video_x_flv; +case 2621081147:return video_x_isvideo; +case 2460558646:return video_x_jng; +case 3171067551:return video_x_m4v; +case 2149705476:return video_x_matroska; +case 2548446131:return video_x_mng; +case 1369518905:return video_x_motion_jpeg; +case 379033181:case 3969428286:case 3351930404:return video_x_ms_asf; +case 4122831001:return video_x_msvideo; +case 899307365:return video_x_qtc; +case 492761711:case 686609180:return video_x_sgi_movie; +case 1753898927:return application_vnd_ms_outlook; +case 85101078:return image_x_olympus_orf; +case 22242487:return image_x_nikon_nef; +case 1879309223:return image_x_fuji_raf; +case 10345373:case 447994709:return image_x_panasonic_raw; +case 2553743420:return image_x_adobe_dng; +case 1611589279:return image_x_canon_cr2; +case 1723356032:return image_x_canon_crw; +case 1078235802:return image_x_kodak_dcr; +case 28033:return image_x_kodak_k25; +case 1860789138:return image_x_kodak_kdc; +case 1814462090:return image_x_minolta_mrw; +case 401337037:return image_x_pentax_pef; +case 701779405:return image_x_sigma_x3f; +case 1698465774:return image_x_sony_arw; +case 2083014127:return image_x_sony_sr2; +case 271503362:return image_x_sony_srf; +case 142938048:return image_x_epson_erf; +case 287571459:return sist2_sidecar; +default: return 0;}} +unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) { +case 3812269631: return application_arj; +case 2479484568: return application_base64; +case 3891182180: return application_binhex; +case 3319475062: return application_book; +case 131831009: return application_CDFV2_corrupt; +case 1972415093: return application_CDFV2; +case 2361432233: return application_clariscad; +case 3013534691: return application_commonground; +case 3986958175: return application_csv; +case 2684316502: return application_dicom; +case 1539671880: return application_drafting; +case 749429103: return application_epub_zip; +case 663947845: return application_freeloader; +case 1404675439: return application_futuresplash; +case 366020206: return application_groupwise; +case 886559925: return application_gzip; +case 761519038: return application_hta; +case 3028409237: return application_i_deas; +case 4115303699: return application_iges; +case 48815857: return application_inf; +case 2879859070: return application_java_archive; +case 1898213013: return application_java; +case 223081448: return application_javascript; +case 1698278658: return application_json; +case 4187371699: return application_ndjson; +case 1720840708: return application_marc; +case 2657503299: return application_mbedlet; +case 1287325719: return application_mime; +case 2397876339: return application_mspowerpoint; +case 87314907: return application_msword; +case 1400780351: return application_netmc; +case 3754511218: return application_octet_stream; +case 1659821930: return application_oda; +case 2694850972: return application_ogg; +case 3958419076: return application_pdf; +case 1866620368: return application_pgp_keys; +case 489766691: return application_pgp_signature; +case 4163635168: return application_pkcs7_signature; +case 75695703: return application_pkix_cert; +case 2281931862: return application_postscript; +case 2662888269: return application_pro_eng; +case 2510237148: return application_ringing_tones; +case 3871669703: return application_smil; +case 2233914511: return application_solids; +case 4248059749: return application_sounder; +case 1300723323: return application_step; +case 2336404640: return application_streamingmedia; +case 1897482133: return application_vda; +case 2726368314: return application_vnd_fdf; +case 3075632147: return application_vnd_font_fontforge_sfd; +case 2621280924: return application_vnd_hp_hpgl; +case 2741854226: return application_vnd_iccprofile; +case 1371686262: return application_vnd_lotus_1_2_3; +case 3936927446: return application_vnd_ms_cab_compressed; +case 4257532721: return application_vnd_ms_excel; +case 3556277566: return application_vnd_ms_fontobject; +case 2971868745: return application_vnd_ms_opentype; +case 458899730: return application_vnd_ms_pki_certstore; +case 1930126273: return application_vnd_ms_pki_pko; +case 2755865911: return application_vnd_ms_pki_seccat; +case 1964479319: return application_vnd_ms_powerpoint; +case 2494900263: return application_vnd_ms_project; +case 4144865272: return application_vnd_oasis_opendocument_base; +case 1537772039: return application_vnd_oasis_opendocument_formula; +case 3291349919: return application_vnd_oasis_opendocument_graphics; +case 2829854259: return application_vnd_oasis_opendocument_presentation; +case 2319019141: return application_vnd_oasis_opendocument_spreadsheet; +case 204654174: return application_vnd_oasis_opendocument_text; +case 817338285: return application_vnd_openxmlformats_officedocument_presentationml_presentation; +case 3437874751: return application_vnd_openxmlformats_officedocument_spreadsheetml_sheet; +case 2166600829: return application_vnd_openxmlformats_officedocument_wordprocessingml_document; +case 37472375: return application_vnd_symbian_install; +case 1262135101: return application_vnd_tcpdump_pcap; +case 1817019072: return application_vnd_wap_wmlc; +case 2753880093: return application_vnd_wap_wmlscriptc; +case 3065697271: return application_vnd_xara; +case 838649278: return application_vocaltec_media_desc; +case 3067502586: return application_vocaltec_media_file; +case 1496663551: return application_warc; +case 3547836790: return application_winhelp; +case 3853357533: return application_wordperfect; +case 3261561739: return application_x_123; +case 283325182: return application_x_7z_compressed; +case 2068294999: return application_x_aim; +case 4169136188: return application_x_apple_diskimage; +case 902384586: return application_x_arc; +case 4124671903: return application_x_archive; +case 1188732497: return application_x_atari_7800_rom; +case 496856974: return application_x_authorware_bin; +case 605528024: return application_x_authorware_map; +case 3574616417: return application_x_authorware_seg; +case 2858645924: return application_x_avira_qua; +case 1234533326: return application_x_bcpio; +case 1092178040: return application_x_bittorrent; +case 3108379994: return application_x_bsh; +case 153724964: return application_x_bytecode_python; +case 4188634997: return application_x_bzip2; +case 533359240: return application_x_bzip; +case 373271815: return application_x_cbr; +case 417604917: return application_x_cbz; +case 241171266: return application_x_cdlink; +case 2002793698: return application_x_chat; +case 529743617: return application_x_chrome_extension; +case 2443278958: return application_x_cocoa; +case 2145722326: return application_x_conference; +case 2927741547: return application_x_coredump; +case 669615566: return application_x_cpio; +case 162164735: return application_x_dbf; +case 4195557047: return application_x_dbt; +case 3783917932: return application_x_debian_package; +case 104378865: return application_x_deepv; +case 2277091176: return application_x_director; +case 2061944417: return application_x_dmp; +case 1831097311: return application_x_dosdriver; +case 525018050: return application_x_dosexec; +case 3082528059: return application_x_dvi; +case 3867270345: return application_x_elc; +case 282200480: return application_x_empty; +case 2950435174: return application_x_envoy; +case 3187201087: return application_x_esrehber; +case 1467053279: return application_x_excel; +case 947844639: return application_x_executable; +case 2396718311: return application_x_font_gdos; +case 1409586422: return application_x_font_pf2; +case 2949699731: return application_x_font_pfm; +case 882849648: return application_x_font_sfn; +case 1207949588: return application_x_font_ttf; +case 3696685787: return application_x_fptapplication_x_dbt; +case 3068589175: return application_x_freelance; +case 1117803295: return application_x_gamecube_rom; +case 3193872850: return application_x_gdbm; +case 3979978688: return application_x_gettext_translation; +case 463567141: return application_x_git; +case 2900445159: return application_x_gsp; +case 904402525: return application_x_gss; +case 73227412: return application_x_gtar; +case 672438970: return application_x_gzip; +case 1458188573: return application_x_hdf; +case 229946719: return application_x_helpfile; +case 646033581: return application_x_httpd_imap; +case 411989440: return application_x_ima; +case 3975252160: return application_x_innosetup; +case 1986465192: return application_x_internett_signup; +case 2967475964: return application_x_inventor; +case 360716690: return application_x_ip2; +case 1941969752: return application_x_java_applet; +case 2680192289: return application_x_java_commerce; +case 2138341338: return application_x_java_image; +case 69714645: return application_x_java_jmod; +case 4223869: return application_x_java_keystore; +case 2580542542: return application_x_kdelnk; +case 1257894898: return application_x_koan; +case 1958965043: return application_x_latex; +case 3948850351: return application_x_livescreen; +case 1225577433: return application_x_lotus; +case 1357787287: return application_x_lz4_json; +case 13080262: return application_x_lz4; +case 841824197: return application_x_lzh_compressed; +case 1645906713: return application_x_lzh; +case 4279573947: return application_x_lzip; +case 4056545357: return application_x_lzma; +case 2840574525: return application_x_lzop; +case 2142083965: return application_x_lzx; +case 2343296583: return application_x_mach_binary; +case 3713471041: return application_x_mach_executable; +case 217874104: return application_x_magic_cap_package_1_0; +case 451420058: return application_x_mathcad; +case 3288238855: return application_x_maxis_dbpf; +case 1499643772: return application_x_meme; +case 2282533242: return application_x_midi; +case 3851390395: return application_x_mif; +case 3005113033: return application_x_mix_transfer; +case 3572804971: return application_xml; +case 3049871419: return application_x_mobipocket_ebook; +case 3026870622: return application_vnd_amazon_mobi8_ebook; +case 3721289434: return application_x_msaccess; +case 1771413382: return application_x_ms_compress_szdd; +case 1442391593: return application_x_ms_pdb; +case 1142990575: return application_x_ms_reader; +case 873042275: return application_x_n64_rom; +case 2786166244: return application_x_navi_animation; +case 3973357482: return application_x_navidoc; +case 4181248117: return application_x_navimap; +case 3312986187: return application_x_navistyle; +case 1421897571: return application_x_nes_rom; +case 2479206333: return application_x_netcdf; +case 3639323747: return application_x_newton_compatible_pkg; +case 2023658739: return application_x_nintendo_ds_rom; +case 3792313241: return application_x_object; +case 2690661493: return application_x_omcdatamaker; +case 4060964958: return application_x_omc; +case 963821989: return application_x_omcregerator; +case 3803521399: return application_x_pagemaker; +case 3947609100: return application_x_pcl; +case 1165863721: return application_x_pgp_keyring; +case 3070019447: return application_x_pixclscript; +case 4210405996: return application_x_pkcs7_certreqresp; +case 1106894812: return application_x_pkcs7_signature; +case 261550829: return application_x_project; +case 1959776576: return application_x_qpro; +case 553006979: return application_x_rar; +case 4263975270: return application_x_rpm; +case 2991442141: return application_x_sdp; +case 3253131630: return application_x_sea; +case 2900890640: return application_x_seelogo; +case 3384573178: return application_x_setupscript; +case 177730462: return application_x_sharedlib; +case 3457861192: return application_x_shar; +case 69425328: return application_x_shockwave_flash; +case 497520372: return application_x_snappy_framed; +case 2142912491: return application_x_sprite; +case 1325004050: return application_x_sqlite3; +case 3872402089: return application_x_stargallery_thm; +case 1879600421: return application_x_stuffit; +case 4249050059: return application_x_sv4cpio; +case 4230695631: return application_x_sv4crc; +case 612059953: return application_x_tar; +case 1489303899: return application_x_tbook; +case 3222972068: return application_x_terminfo; +case 3417228122: return application_x_terminfo2; +case 3728265961: return application_x_texinfo; +case 905009673: return application_x_tex_tfm; +case 1738156494: return application_x_ustar; +case 2970490033: return application_x_visio; +case 1138952844: return application_x_vnd_audioexplosion_mzz; +case 1304824724: return application_x_vnd_ls_xpix; +case 3201262259: return application_x_vrml; +case 3932648349: return application_x_wais_source; +case 1235338088: return application_x_wine_extension_ini; +case 589270721: return application_x_wintalk; +case 1113728295: return application_x_world; +case 3451354390: return application_x_wri; +case 57439876: return application_x_x509_ca_cert; +case 3864158535: return application_x_xz; +case 137530655: return application_x_zip; +case 1610651158: return application_x_zstd; +case 2803753038: return application_zip; +case 2109142397: return application_zlib; +case 4190753873: return audio_basic; +case 1937202163: return audio_it; +case 2897185533: return audio_make; +case 747801505: return audio_midi; +case 1149197041: return audio_mid; +case 3021251101: return audio_mp4; +case 3250982680: return audio_mpeg; +case 1081209515: return audio_ogg; +case 1800395249: return audio_s3m; +case 1115987927: return audio_tsp_audio; +case 3332029264: return audio_tsplayer; +case 2061915550: return audio_vnd_qcelp; +case 773784371: return audio_voxware; +case 3530521778: return audio_x_aiff; +case 1985537192: return audio_x_flac; +case 1323393159: return audio_x_gsm; +case 4036043693: return audio_x_hx_aac_adts; +case 1053655815: return audio_x_jam; +case 3004387442: return audio_x_liveaudio; +case 4126699704: return audio_x_m4a; +case 976272409: return audio_x_midi; +case 3705471144: return audio_x_mod; +case 673327857: return audio_x_mp4a_latm; +case 1478949397: return audio_x_mpeg_3; +case 87820768: return audio_x_mpequrl; +case 1153814563: return audio_xm; +case 1178632038: return audio_x_nspaudio; +case 3992820039: return audio_x_pn_realaudio; +case 1122834189: return audio_x_psid; +case 97005398: return audio_x_realaudio; +case 2780242990: return audio_x_s3m; +case 2160563041: return audio_x_twinvq_plugin; +case 1246057794: return audio_x_twinvq; +case 1389025690: return audio_x_voc; +case 2690022088: return audio_x_wav; +case 1048952181: return audio_x_xbox360_executable; +case 544201676: return audio_x_xbox_executable; +case 1689004438: return font_otf; +case 1373033921: return font_sfnt; +case 143151839: return font_woff2; +case 3926229593: return font_woff; +case 126997061: return image_bmp; +case 3256582560: return image_cmu_raster; +case 2418019020: return image_fif; +case 2026029393: return image_florian; +case 2108397085: return image_g3fax; +case 2447532283: return image_gif; +case 4205241190: return image_heic; +case 935920381: return image_ief; +case 3785015651: return image_jpeg; +case 3265402270: return image_jutvision; +case 2832913471: return image_naplps; +case 523044927: return image_pict; +case 2966254431: return image_png; +case 814765407: return image_svg; +case 910409547: return image_svg_xml; +case 511777381: return image_tiff; +case 3977526033: return image_vnd_adobe_photoshop; +case 3999110387: return image_vnd_djvu; +case 143568099: return image_vnd_fpx; +case 517285938: return image_vnd_microsoft_icon; +case 4258027809: return image_vnd_rn_realflash; +case 358996403: return image_vnd_rn_realpix; +case 1399719511: return image_vnd_wap_wbmp; +case 950571266: return image_vnd_xiff; +case 2509651130: return image_webp; +case 3920103055: return image_wmf; +case 2937026420: return image_x_3ds; +case 2981127273: return image_x_award_bioslogo; +case 399334222: return image_x_cmu_raster; +case 3882699330: return image_x_cur; +case 3184122542: return image_x_dwg; +case 3918590499: return image_x_eps; +case 1447971005: return image_x_exr; +case 665779514: return image_x_gem; +case 307945478: return image_x_icns; +case 1749532062: return image_x_icon; +case 4168548187: return image_x_jg; +case 3805066526: return image_x_jps; +case 1491847821: return image_x_ms_bmp; +case 656290698: return image_x_niff; +case 88083362: return image_x_pcx; +case 420910635: return image_x_pict; +case 3189998154: return image_x_portable_bitmap; +case 35768844: return image_x_portable_graymap; +case 3136003682: return image_x_portable_pixmap; +case 1296924133: return image_x_quicktime; +case 2680873906: return image_x_rgb; +case 38719162: return image_x_tga; +case 414876785: return image_x_tiff; +case 1917930393: return image_x_win_bitmap; +case 4049364857: return image_x_xcf; +case 1663705411: return image_x_xpixmap; +case 153652621: return image_x_xwindowdump; +case 1301602886: return message_news; +case 475605908: return message_rfc822; +case 1271393540: return model_vnd_dwf; +case 3367762897: return model_vnd_gdl; +case 857821694: return model_vnd_gs_gdl; +case 2025285843: return model_vrml; +case 1733021175: return model_x_pov; +case 2596712340: return text_asp; +case 4926016: return text_css; +case 3872744991: return text_html; +case 3862088606: return text_javascript; +case 768274928: return text_mcf; +case 3970938585: return text_pascal; +case 1059844876: return text_PGP; +case 1152832851: return text_plain; +case 2809123822: return application_vnd_coffeescript; +case 4000659158: return text_richtext; +case 1060344107: return text_rtf; +case 211439948: return text_scriplet; +case 2273059608: return text_tab_separated_values; +case 2184047304: return text_troff; +case 3483524219: return text_uri_list; +case 1323400122: return text_vnd_abc; +case 1570296745: return text_vnd_fmi_flexstor; +case 1751327861: return text_vnd_wap_wmlscript; +case 4260425760: return text_vnd_wap_wml; +case 2469299651: return text_webviewhtml; +case 4287189747: return text_x_Algol68; +case 2118781611: return text_x_asm; +case 952979666: return text_x_audiosoft_intra; +case 4081412762: return text_x_awk; +case 646156822: return text_x_bcpl; +case 699860591: return text_x_c; +case 2417225020: return text_x_c__; +case 238960228: return text_x_component; +case 4063368405: return text_x_diff; +case 1029004811: return text_x_fortran; +case 3361256876: return text_x_java; +case 2465655719: return text_x_la_asf; +case 4108808063: return text_x_lisp; +case 2953969017: return text_x_m4; +case 4016541460: return text_x_makefile; +case 1232164091: return text_xml; +case 3457100648: return text_x_m; +case 700310341: return text_x_msdos_batch; +case 2079603480: return text_x_ms_regedit; +case 4063403658: return text_x_objective_c; +case 1452612725: return text_x_pascal; +case 3928494898: return text_x_perl; +case 2845729967: return text_x_php; +case 3015973145: return text_x_po; +case 3876215756: return text_x_python; +case 2065206899: return text_x_ruby; +case 1804019892: return text_x_sass; +case 1745091802: return text_x_scss; +case 2730494614: return text_x_server_parsed_html; +case 884456798: return text_x_setext; +case 910386732: return text_x_sgml; +case 709434536: return text_x_shellscript; +case 3374645891: return text_x_speech; +case 1499613687: return text_x_tcl; +case 367147532: return text_x_tex; +case 2723138378: return text_x_uil; +case 1710771134: return text_x_uuencode; +case 704335853: return text_x_vcalendar; +case 820689877: return text_x_vcard; +case 1266973941: return video_animaflex; +case 1794710033: return video_avi; +case 793710652: return video_avs_video; +case 1538592195: return video_MP2T; +case 543386042: return video_mp4; +case 2313178776: return video_mpeg; +case 3777745158: return video_quicktime; +case 3792140802: return video_vdo; +case 1916522329: return video_vivo; +case 82483669: return video_vnd_rn_realvideo; +case 4093612396: return video_vosaic; +case 54388513: return video_webm; +case 2020036618: return video_x_amt_demorun; +case 2314063236: return video_x_amt_showrun; +case 2591549712: return video_x_atomic3d_feature; +case 76200991: return video_x_dl; +case 4192748901: return video_x_dv; +case 1759798614: return video_x_fli; +case 3857475747: return video_x_flv; +case 3528106369: return video_x_isvideo; +case 3027293111: return video_x_jng; +case 2614841374: return video_x_m4v; +case 4159659542: return video_x_matroska; +case 2973746482: return video_x_mng; +case 3962088510: return video_x_motion_jpeg; +case 1565147726: return video_x_ms_asf; +case 637896082: return video_x_msvideo; +case 323199460: return video_x_qtc; +case 4268619377: return video_x_sgi_movie; +case 661782105: return x_epoc_x_sisx_app; +case 2159585521: return application_x_zstd_dictionary; +case 106416856: return application_vnd_ms_outlook; +case 2040256118: return image_x_olympus_orf; +case 3741779740: return image_x_nikon_nef; +case 2950019021: return image_x_fuji_raf; +case 2116379200: return image_x_panasonic_raw; +case 1038006091: return image_x_adobe_dng; +case 3469235825: return image_x_canon_cr2; +case 3363758958: return image_x_canon_crw; +case 1615469233: return image_x_dcraw; +case 373957392: return image_x_kodak_dcr; +case 1443814411: return image_x_kodak_k25; +case 954716696: return image_x_kodak_kdc; +case 1433180503: return image_x_minolta_mrw; +case 698598226: return image_x_pentax_pef; +case 2130369412: return image_x_sigma_x3f; +case 3060720351: return image_x_sony_arw; +case 2944016606: return image_x_sony_sr2; +case 3279729971: return image_x_sony_srf; +case 1665206815: return image_x_epson_erf; +case 521139448: return sist2_sidecar; +default: return 0;}} #endif diff --git a/src/parsing/parse.c b/src/parsing/parse.c index a792ad6..4786407 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -5,235 +5,242 @@ #include "mime.h" #include "src/io/serialize.h" #include "src/parsing/sidecar.h" -#include "src/magic_generated.c" - -#include +#include "src/parsing/fs_util.h" +#include "src/parsing/magic_util.h" +#include #define MIN_VIDEO_SIZE (1024 * 64) #define MIN_IMAGE_SIZE (512) -int fs_read(struct vfile *f, void *buf, size_t size) { +#define MAGIC_BUF_SIZE (4096 * 6) - if (f->fd == -1) { - SHA1_Init(&f->sha1_ctx); +typedef enum { + FILETYPE_DONT_PARSE, + FILETYPE_RAW, + FILETYPE_MEDIA, + FILETYPE_EBOOK, + FILETYPE_MARKUP, + FILETYPE_TEXT, + FILETYPE_FONT, + FILETYPE_ARCHIVE, + FILETYPE_OOXML, + FILETYPE_COMIC, + FILETYPE_MOBI, + FILETYPE_SIST2_SIDECAR, + FILETYPE_MSDOC, + FILETYPE_JSON, + FILETYPE_NDJSON, +} file_type_t; - f->fd = open(f->filepath, O_RDONLY); - if (f->fd == -1) { - return -1; +file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath) { + + int major_mime = MAJOR_MIME(mime); + + if (!(SHOULD_PARSE(mime))) { + return FILETYPE_DONT_PARSE; + } else if (IS_RAW(mime)) { + return FILETYPE_RAW; + } else if ((major_mime == MimeVideo && size >= MIN_VIDEO_SIZE) || + (major_mime == MimeImage && size >= MIN_IMAGE_SIZE) || major_mime == MimeAudio) { + return FILETYPE_MEDIA; + } else if (IS_PDF(mime)) { + return FILETYPE_EBOOK; + } else if (major_mime == MimeText && ScanCtx.text_ctx.content_size > 0) { + if (IS_MARKUP(mime)) { + return FILETYPE_MARKUP; + } else { + return FILETYPE_TEXT; + } + + } else if (IS_FONT(mime)) { + return FILETYPE_FONT; + } else if ( + ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && ( + IS_ARC(mime) || + (IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath)) + )) { + return FILETYPE_ARCHIVE; + } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(mime)) { + return FILETYPE_OOXML; + } else if (is_cbr(&ScanCtx.comic_ctx, mime) || is_cbz(&ScanCtx.comic_ctx, mime)) { + return FILETYPE_COMIC; + } else if (IS_MOBI(mime)) { + return FILETYPE_MOBI; + } else if (mime == MIME_SIST2_SIDECAR) { + return FILETYPE_SIST2_SIDECAR; + } else if (is_msdoc(&ScanCtx.msdoc_ctx, mime)) { + return FILETYPE_MSDOC; + } else if (is_json(&ScanCtx.json_ctx, mime)) { + return FILETYPE_JSON; + } else if (is_ndjson(&ScanCtx.json_ctx, mime)) { + return FILETYPE_NDJSON; + } +} + +#define GET_MIME_ERROR_FATAL (-1) + +int get_mime(parse_job_t *job) { + + char *extension = job->filepath + job->ext; + + int mime = 0; + + if (job->vfile.st_size == 0) { + return MIME_EMPTY; + } + + if (*extension != '\0' && (job->ext - job->base != 1)) { + mime = (int) mime_get_mime_by_ext(extension); + + if (mime != 0) { + return mime; } } - int ret = (int) read(f->fd, buf, size); - - if (ret != 0 && f->calculate_checksum) { - f->has_checksum = TRUE; - safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret); + if (strlen(extension) == 0 && strlen(job->filepath + job->base) == 40) { + fprintf(stderr, "GIT? %s", job->filepath); } - return ret; -} - -#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));}; - -void fs_close(struct vfile *f) { - if (f->fd != -1) { - SHA1_Final(f->sha1_digest, &f->sha1_ctx); - close(f->fd); + if (ScanCtx.fast) { + return 0; } -} -void fs_reset(struct vfile *f) { - if (f->fd != -1) { - lseek(f->fd, 0, SEEK_SET); + // Get mime type with libmagic + if (job->vfile.read_rewindable == NULL) { + LOG_WARNING(job->filepath, + "File does not support rewindable reads, cannot guess Media type"); + return 0; } + + char *buf[MAGIC_BUF_SIZE]; + int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE); + if (bytes_read < 0) { + if (job->vfile.is_fs_file) { + LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno)); + } else { + LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc)); + } + + + return GET_MIME_ERROR_FATAL; + } + + char *magic_mime_str = magic_buffer_embedded(buf, bytes_read); + + if (magic_mime_str != NULL) { + mime = (int) mime_get_mime_by_string(magic_mime_str); + free(magic_mime_str); + + if (mime == 0) { + LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str); + return 0; + } + } + + if (job->vfile.reset != NULL) { + job->vfile.reset(&job->vfile); + } + + return mime; } -void set_dbg_current_file(parse_job_t *job) { - unsigned long long pid = (unsigned long long) pthread_self(); - pthread_mutex_lock(&ScanCtx.dbg_current_files_mu); - g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job); - pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu); -} +void parse(parse_job_t *job) { -void parse_job(parse_job_t *job) { - tpool_work_arg_shm_t *arg = malloc(sizeof(tpool_work_arg_shm_t) + sizeof(*job)); - - memcpy(arg->arg, job, sizeof(*job)); - arg->arg_size = -1; - - parse(arg); - - free(arg); -} - -void parse(tpool_work_arg_shm_t *arg) { - - parse_job_t *job = (void*)arg->arg; + if (job->vfile.is_fs_file) { + job->vfile.read = fs_read; + job->vfile.read_rewindable = fs_read; + job->vfile.reset = fs_reset; + job->vfile.close = fs_close; + job->vfile.calculate_checksum = ScanCtx.calculate_checksums; + } document_t *doc = malloc(sizeof(document_t)); - set_dbg_current_file(job); - strcpy(doc->filepath, job->filepath); - doc->ext = (short) job->ext; - doc->base = (short) job->base; - - char *rel_path = doc->filepath + ScanCtx.index.desc.root_len; - generate_doc_id(rel_path, doc->doc_id); - + doc->ext = job->ext; + doc->base = job->base; doc->meta_head = NULL; doc->meta_tail = NULL; - doc->mime = 0; doc->size = job->vfile.st_size; doc->mtime = (int) job->vfile.mtime; + doc->mime = get_mime(job); + generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id); - int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id); - if (inc_ts != 0 && inc_ts == job->vfile.mtime) { - pthread_mutex_lock(&ScanCtx.copy_table_mu); - incremental_mark_file(ScanCtx.copy_table, doc->doc_id); - pthread_mutex_unlock(&ScanCtx.copy_table_mu); + if (doc->mime == GET_MIME_ERROR_FATAL) { + pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); + ScanCtx.dbg_failed_files_count += 1; + pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); + CLOSE_FILE(job->vfile) + free(doc); + return; + } + + if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) { pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); ScanCtx.dbg_skipped_files_count += 1; pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); CLOSE_FILE(job->vfile) free(doc); - return; } - if (ScanCtx.new_table != NULL) { - pthread_mutex_lock(&ScanCtx.copy_table_mu); - incremental_mark_file(ScanCtx.new_table, doc->doc_id); - pthread_mutex_unlock(&ScanCtx.copy_table_mu); - } - - char *buf[MAGIC_BUF_SIZE]; - if (LogCtx.very_verbose) { - LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id) + LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id); } - if (job->ext > 4096) { - fprintf(stderr, "Ext is %d, filename is %s\n", job->ext, job->filepath); - } - - if (job->vfile.st_size == 0) { - doc->mime = MIME_EMPTY; - } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) { - doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); - } - - if (doc->mime == 0 && !ScanCtx.fast) { - - // Get mime type with libmagic - if (job->vfile.read_rewindable == NULL) { - LOG_WARNING(job->filepath, - "File does not support rewindable reads, cannot guess Media type"); - goto abort; - } - - int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE); - if (bytes_read < 0) { - - if (job->vfile.is_fs_file) { - LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno)) - } else { - LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc)) - } - - pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu); - ScanCtx.dbg_failed_files_count += 1; - pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu); - + switch (get_file_type(doc->mime, doc->size, doc->filepath)) { + case FILETYPE_RAW: + parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc); + break; + case FILETYPE_MEDIA: + parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime)); + break; + case FILETYPE_EBOOK: + parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc); + break; + case FILETYPE_MARKUP: + parse_markup(&ScanCtx.text_ctx, &job->vfile, doc); + break; + case FILETYPE_TEXT: + parse_text(&ScanCtx.text_ctx, &job->vfile, doc); + break; + case FILETYPE_FONT: + parse_font(&ScanCtx.font_ctx, &job->vfile, doc); + break; + case FILETYPE_ARCHIVE: + parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra); + break; + case FILETYPE_OOXML: + parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc); + break; + case FILETYPE_COMIC: + parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc); + break; + case FILETYPE_MOBI: + parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc); + break; + case FILETYPE_SIST2_SIDECAR: + parse_sidecar(&job->vfile, doc); CLOSE_FILE(job->vfile) free(doc); - return; - } - - magic_t magic = magic_open(MAGIC_MIME_TYPE); - - const char *magic_buffers[1] = {magic_database_buffer,}; - size_t sizes[1] = {sizeof(magic_database_buffer),}; - - int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1); - - if (load_ret != 0) { - LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret) - } - - const char *magic_mime_str = magic_buffer(magic, buf, bytes_read); - if (magic_mime_str != NULL) { - doc->mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str); - - LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str); - - if (doc->mime == 0) { - LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str); - } - } - - if (job->vfile.reset != NULL) { - job->vfile.reset(&job->vfile); - } - - magic_close(magic); + case FILETYPE_MSDOC: + parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc); + break; + case FILETYPE_JSON: + parse_json(&ScanCtx.json_ctx, &job->vfile, doc); + break; + case FILETYPE_NDJSON: + parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc); + break; + case FILETYPE_DONT_PARSE: + default: + break; } - int mmime = MAJOR_MIME(doc->mime); - - if (!(SHOULD_PARSE(doc->mime))) { - - } else if (IS_RAW(doc->mime)) { - parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc); - } else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) || - (mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { - - parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime)); - - } else if (IS_PDF(doc->mime)) { - parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc); - - } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) { - if (IS_MARKUP(doc->mime)) { - parse_markup(&ScanCtx.text_ctx, &job->vfile, doc); - } else { - parse_text(&ScanCtx.text_ctx, &job->vfile, doc); - } - - } else if (IS_FONT(doc->mime)) { - parse_font(&ScanCtx.font_ctx, &job->vfile, doc); - - } else if ( - ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && ( - IS_ARC(doc->mime) || - (IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext)) - )) { - parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra); - } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) { - parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc); - } else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) { - parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc); - } else if (IS_MOBI(doc->mime)) { - parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc); - } else if (doc->mime == MIME_SIST2_SIDECAR) { - parse_sidecar(&job->vfile, doc); - CLOSE_FILE(job->vfile) - free(doc); - return; - } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) { - parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc); - } else if (is_json(&ScanCtx.json_ctx, doc->mime)) { - parse_json(&ScanCtx.json_ctx, &job->vfile, doc); - } else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) { - parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc); - } - - abort: - //Parent meta if (job->parent[0] != '\0') { meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN); @@ -247,12 +254,8 @@ void parse(tpool_work_arg_shm_t *arg) { if (job->vfile.has_checksum) { char sha1_digest_str[SHA1_STR_LENGTH]; buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str); - APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str); + APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str) } write_document(doc); } - -void cleanup_parse() { - // noop -} diff --git a/src/parsing/parse.h b/src/parsing/parse.h index 55ecf18..4d090ec 100644 --- a/src/parsing/parse.h +++ b/src/parsing/parse.h @@ -4,15 +4,7 @@ #include "../sist.h" #include "src/tpool.h" -#define MAGIC_BUF_SIZE (4096 * 6) -int fs_read(struct vfile *f, void *buf, size_t size); -void fs_close(struct vfile *f); -void fs_reset(struct vfile *f); - -void parse_job(parse_job_t *job); -void parse(tpool_work_arg_shm_t *arg); - -void cleanup_parse(); +void parse(parse_job_t *arg); #endif diff --git a/src/parsing/sidecar.c b/src/parsing/sidecar.c index 0a8952a..5260649 100644 --- a/src/parsing/sidecar.c +++ b/src/parsing/sidecar.c @@ -4,12 +4,12 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) { - LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath) + LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath); size_t size; char *buf = read_all(vfile, &size); if (buf == NULL) { - LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath) + LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath); return; } @@ -18,7 +18,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) { cJSON *json = cJSON_Parse(buf); if (json == NULL) { - LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath) + LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath); return; } char *json_str = cJSON_PrintUnformatted(json); @@ -32,8 +32,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) { generate_doc_id(rel_path, assoc_doc_id); - store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str, - strlen(json_str) + 1); + database_write_document_sidecar(ProcData.index_db, assoc_doc_id, json_str); cJSON_Delete(json); free(json_str); diff --git a/src/sist.h b/src/sist.h index 30ded55..8e6eb5a 100644 --- a/src/sist.h +++ b/src/sist.h @@ -49,8 +49,11 @@ #include #include "git_hash.h" -#define VERSION "2.14.3" +#define VERSION "3.0.0" static const char *const Version = VERSION; +static const int VersionMajor = 3; +static const int VersionMinor = 0; +static const int VersionPatch = 0; #ifndef SIST_PLATFORM #define SIST_PLATFORM unknown diff --git a/src/stats.c b/src/stats.c deleted file mode 100644 index f94a1a6..0000000 --- a/src/stats.c +++ /dev/null @@ -1,343 +0,0 @@ -#include "sist.h" -#include "io/serialize.h" -#include "ctx.h" - -static GHashTable *FlatTree; -static GHashTable *BufferTable; - -static GHashTable *AggMime; -static GHashTable *AggSize; -static GHashTable *AggDate; - -#define SIZE_BUCKET (long)(5 * 1024 * 1024) -#define DATE_BUCKET (long)(2629800) - -static long TotalSize = 0; -static long DocumentCount = 0; - -typedef struct { - long size; - long count; -} agg_t; - -void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) { - - if (cJSON_GetObjectItem(document, "parent") != NULL) { - return; - } - - const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring; - char *path = malloc(strlen(json_path) + 1); - strcpy(path, json_path); - - const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring; - char *mime; - if (json_mime == NULL) { - mime = NULL; - } else { - mime = malloc(strlen(json_mime) + 1); - strcpy(mime, json_mime); - } - - long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble; - int mtime = cJSON_GetObjectItem(document, "mtime")->valueint; - - // treemap - void *existing_path = g_hash_table_lookup(FlatTree, path); - if (existing_path == NULL) { - g_hash_table_insert(FlatTree, path, (gpointer) size); - } else { - g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size)); - } - - // mime agg - if (mime != NULL) { - agg_t *orig_agg = g_hash_table_lookup(AggMime, mime); - if (orig_agg == NULL) { - agg_t *agg = malloc(sizeof(agg_t)); - agg->size = size; - agg->count = 1; - g_hash_table_insert(AggMime, mime, agg); - } else { - orig_agg->size += size; - orig_agg->count += 1; - free(mime); - } - } - - // size agg - long size_bucket = size - (size % SIZE_BUCKET); - agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket); - if (orig_agg == NULL) { - agg_t *agg = malloc(sizeof(agg_t)); - agg->size = size; - agg->count = 1; - g_hash_table_insert(AggSize, (gpointer) size_bucket, agg); - } else { - orig_agg->count += 1; - orig_agg->size += size; - } - - // date agg - long date_bucket = mtime - (mtime % DATE_BUCKET); - orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket); - if (orig_agg == NULL) { - agg_t *agg = malloc(sizeof(agg_t)); - agg->size = size; - agg->count = 1; - g_hash_table_insert(AggDate, (gpointer) date_bucket, agg); - } else { - orig_agg->count += 1; - orig_agg->size += size; - } - - TotalSize += size; - DocumentCount += 1; -} - -void read_index_into_tables(index_t *index) { - char file_path[PATH_MAX]; - READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1); -} - -static size_t rfind(const char *str, int c) { - for (int i = (int)strlen(str); i >= 0; i--) { - if (str[i] == c) { - return i; - } - } - return -1; -} - -int merge_up(double thresh) { - long min_size = (long) (thresh * (double) TotalSize); - - int count = 0; - GHashTableIter iter; - g_hash_table_iter_init(&iter, FlatTree); - - void *key; - void *value; - - while (g_hash_table_iter_next(&iter, &key, &value)) { - long size = (long) value; - - if (size < min_size) { - int stop = rfind(key, '/'); - if (stop == -1) { - stop = 0; - } - char *parent = malloc(stop + 1); - strncpy(parent, key, stop); - *(parent + stop) = '\0'; - - void *existing_parent = g_hash_table_lookup(FlatTree, parent); - if (existing_parent == NULL) { - void *existing_parent2_key; - void *existing_parent2_val; - int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key, - &existing_parent2_val); - if (!found) { - g_hash_table_insert(BufferTable, parent, value); - } else { - g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size)); - free(existing_parent2_key); - } - } else { - g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size)); - } - - g_hash_table_iter_remove(&iter); - - count += 1; - } - } - - g_hash_table_iter_init(&iter, BufferTable); - while (g_hash_table_iter_next(&iter, &key, &value)) { - g_hash_table_insert(FlatTree, key, value); - g_hash_table_iter_remove(&iter); - } - - int size = g_hash_table_size(FlatTree); - - LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size) - return count; -} - -/** - * Assumes out is at at least PATH_MAX *4 - */ -void csv_escape(char *dst, const char *str) { - - const char *ptr = str; - char *out = dst; - - if (rfind(str, ',') == -1 && rfind(str, '"') == -1) { - strcpy(dst, str); - return; - } - - *out++ = '"'; - char c; - while ((c = *ptr++) != 0) { - if (c == '"') { - *out++ = '"'; - *out++ = '"'; - } else { - *out++ = c; - } - } - *out++ = '"'; - *out = '\0'; -} - -int open_or_exit(const char *path) { - int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR); - if (fd < 0) { - LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno) - } - return fd; -} - -#define TREEMAP_CSV_HEADER "path,size" -#define MIME_AGG_CSV_HEADER "mime,size,count" -#define SIZE_AGG_CSV_HEADER "bucket,size,count" -#define DATE_AGG_CSV_HEADER "bucket,size,count" - -void write_treemap_csv(double thresh, const char *out_path) { - - void *key; - void *value; - - long min_size = (long) (thresh * (double) TotalSize); - - int fd = open_or_exit(out_path); - int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - - GHashTableIter iter; - g_hash_table_iter_init(&iter, FlatTree); - while (g_hash_table_iter_next(&iter, &key, &value)) { - long size = (long) value; - - if (size >= min_size) { - char path_buf[PATH_MAX * 4]; - char buf[PATH_MAX * 4 + 16]; - - csv_escape(path_buf, key); - size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value); - ret = write(fd, buf, written); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - } - } - close(fd); -} - -void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) { - void *key; - void *value; - char buf[4096]; - - int fd = open_or_exit(out_path); - int ret = write(fd, header, strlen(header)); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - - GHashTableIter iter; - g_hash_table_iter_init(&iter, table); - while (g_hash_table_iter_next(&iter, &key, &value)) { - agg_t *agg = value; - - size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count); - ret = write(fd, buf, written); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - } - - close(fd); -} - -void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) { - void *key; - void *value; - char buf[4096]; - - int fd = open_or_exit(out_path); - int ret = write(fd, header, strlen(header)); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - - GHashTableIter iter; - g_hash_table_iter_init(&iter, table); - while (g_hash_table_iter_next(&iter, &key, &value)) { - agg_t *agg = value; - size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count); - ret = write(fd, buf, written); - if (ret == -1) { - LOG_FATALF("stats.c", "Write error: %s", strerror(errno)) - } - } - - close(fd); -} - -int generate_stats(index_t *index, const double threshold, const char *out_prefix) { - - FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL); - BufferTable = g_hash_table_new(g_str_hash, g_str_equal); - - AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free); - AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); - AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); - - LOG_INFO("stats.c", "Generating stats...") - - read_index_into_tables(index); - - LOG_DEBUG("stats.c", "Read index into tables") - LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize) - LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount) - LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100) - - while (merge_up(threshold) > 100) {} - - char tmp[PATH_MAX]; - - strncpy(tmp, out_prefix, sizeof(tmp)); - strcat(tmp, "treemap.csv"); - write_treemap_csv(threshold, tmp); - - strncpy(tmp, out_prefix, sizeof(tmp)); - strcat(tmp, "mime_agg.csv"); - write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime); - - strncpy(tmp, out_prefix, sizeof(tmp)); - strcat(tmp, "size_agg.csv"); - write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize); - - strncpy(tmp, out_prefix, sizeof(tmp)); - strcat(tmp, "date_agg.csv"); - write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate); - - g_hash_table_remove_all(FlatTree); - g_hash_table_destroy(FlatTree); - g_hash_table_destroy(BufferTable); - - g_hash_table_remove_all(AggMime); - g_hash_table_destroy(AggMime); - g_hash_table_remove_all(AggSize); - g_hash_table_destroy(AggSize); - g_hash_table_remove_all(AggDate); - g_hash_table_destroy(AggDate); - - return 0; -} - diff --git a/src/stats.h b/src/stats.h deleted file mode 100644 index 250874c..0000000 --- a/src/stats.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef SIST2_STATS_H -#define SIST2_STATS_H - -int generate_stats(index_t *index, double threshold, const char* out_prefix); - -#endif diff --git a/src/tpool.c b/src/tpool.c index 8678e22..8f665f8 100644 --- a/src/tpool.c +++ b/src/tpool.c @@ -4,257 +4,250 @@ #include #include #include -#include "mempool/mempool.h" +#include "parsing/parse.h" #define BLANK_STR " " -// TODO: Use slab OOM to control queue size -#define MAX_QUEUE_SIZE 100000 -typedef struct tpool_work { - tpool_work_arg_shm_t *arg; - thread_func_t func; - struct tpool_work *next; -} tpool_work_t; +typedef struct { + int thread_id; + tpool_t *pool; +} start_thread_arg_t; + typedef struct tpool { - tpool_work_t *work_head; - tpool_work_t *work_tail; - - pthread_mutex_t work_mutex; - pthread_mutex_t mem_mutex; - - // TODO: Initialize with SHARED attr - pthread_cond_t has_work_cond; - pthread_cond_t working_cond; - pthread_t threads[256]; - - int thread_cnt; - int work_cnt; - int done_cnt; - int busy_cnt; - - int stop; - int waiting; + int num_threads; + int fork; int print_progress; - void (*cleanup_func)(); - - void *shared_memory; - size_t shared_memory_size; - ncx_slab_pool_t *mempool; + struct { + job_type_t job_type; + int stop; + int waiting; + database_ipc_ctx_t ipc_ctx; + pthread_mutex_t mutex; + pthread_mutex_t data_mutex; + pthread_cond_t done_working_cond; + pthread_cond_t workers_initialized_cond; + int busy_count; + int initialized_count; + } *shm; } tpool_t; - -/** - * Create a work object - */ -static tpool_work_t *tpool_work_create(tpool_t *pool, thread_func_t func, tpool_work_arg_t *arg) { - - if (func == NULL) { - return NULL; +void job_destroy(job_t *job) { + if (job->type == JOB_PARSE_JOB) { + free(job->parse_job); } - // Copy heap arg to shm arg - pthread_mutex_lock(&pool->mem_mutex); - - tpool_work_arg_shm_t *shm_arg = ncx_slab_alloc(pool->mempool, sizeof(tpool_work_arg_shm_t) + arg->arg_size); - - shm_arg->arg_size = arg->arg_size; - memcpy(shm_arg->arg, arg->arg, arg->arg_size); - - free(arg->arg); - - tpool_work_t *work = ncx_slab_alloc(pool->mempool, sizeof(tpool_work_t)); - - pthread_mutex_unlock(&pool->mem_mutex); - - work->func = func; - work->arg = shm_arg; - work->next = NULL; - - return work; + free(job); } void tpool_dump_debug_info(tpool_t *pool) { - LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt) - LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt) - LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt) - LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt) - LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop) -} - -/** - * Pop work object from thread pool - */ -static tpool_work_t *tpool_work_get(tpool_t *pool) { - - tpool_work_t *work = pool->work_head; - if (work == NULL) { - return NULL; - } - - if (work->next == NULL) { - pool->work_head = NULL; - pool->work_tail = NULL; - } else { - pool->work_head = work->next; - } - - return work; + // TODO + LOG_DEBUGF("tpool.c", "pool->num_threads = %d", pool->num_threads); } /** * Push work object to thread pool */ -int tpool_add_work(tpool_t *pool, thread_func_t func, tpool_work_arg_t *arg) { +int tpool_add_work(tpool_t *pool, job_t *job) { - while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) { - usleep(10000); - } - tpool_work_t *work = tpool_work_create(pool, func, arg); - if (work == NULL) { - return 0; + if (pool->shm->job_type == JOB_UNDEFINED) { + pool->shm->job_type = job->type; + } else if (pool->shm->job_type != job->type) { + LOG_FATAL("tpool.c", "FIXME: tpool cannot queue jobs with different types!"); } - pthread_mutex_lock(&(pool->work_mutex)); - if (pool->work_head == NULL) { - pool->work_head = work; - pool->work_tail = pool->work_head; - } else { - pool->work_tail->next = work; - pool->work_tail = work; - } + database_add_work(ProcData.ipc_db, job); - pool->work_cnt++; - - pthread_cond_broadcast(&(pool->has_work_cond)); - pthread_mutex_unlock(&(pool->work_mutex)); - - return 1; + return TRUE; } static void worker_thread_loop(tpool_t *pool) { while (TRUE) { - pthread_mutex_lock(&pool->work_mutex); - if (pool->stop) { + if (pool->shm->stop) { break; } - if (pool->work_head == NULL) { - pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex)); + if (pool->shm->job_type == JOB_UNDEFINED) { + // Wait before first job is queued + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_timedwait_ms(&pool->shm->ipc_ctx.has_work_cond, &pool->shm->mutex, 1000); + pthread_mutex_unlock(&pool->shm->mutex); } - tpool_work_t *work = tpool_work_get(pool); + job_t *job = database_get_work(ProcData.ipc_db, pool->shm->job_type); - if (work != NULL) { - pool->busy_cnt += 1; - } + if (job != NULL) { + pthread_mutex_lock(&(pool->shm->data_mutex)); + pool->shm->busy_count += 1; + pthread_mutex_unlock(&(pool->shm->data_mutex)); - pthread_mutex_unlock(&(pool->work_mutex)); - - if (work != NULL) { - if (pool->stop) { + if (pool->shm->stop) { break; } - work->func(work->arg); + if (job->type == JOB_PARSE_JOB) { + parse(job->parse_job); + } else if (job->type == JOB_BULK_LINE) { + elastic_index_line(job->bulk_line); + } - pthread_mutex_lock(&pool->mem_mutex); - ncx_slab_free(pool->mempool, work->arg); - ncx_slab_free(pool->mempool, work); - pthread_mutex_unlock(&pool->mem_mutex); - } + job_destroy(job); - pthread_mutex_lock(&(pool->work_mutex)); - if (work != NULL) { - pool->busy_cnt -= 1; - pool->done_cnt++; + pthread_mutex_lock(&(pool->shm->data_mutex)); + pool->shm->busy_count -= 1; + pthread_mutex_unlock(&(pool->shm->data_mutex)); + + pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex)); + pool->shm->ipc_ctx.completed_job_count += 1; + pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex)); } if (pool->print_progress) { + + int done = pool->shm->ipc_ctx.completed_job_count; + int count = pool->shm->ipc_ctx.completed_job_count + pool->shm->ipc_ctx.job_count; + if (LogCtx.json_logs) { - progress_bar_print_json(pool->done_cnt, pool->work_cnt, ScanCtx.stat_tn_size, - ScanCtx.stat_index_size, pool->waiting); + progress_bar_print_json(done, + count, + ScanCtx.stat_tn_size, + ScanCtx.stat_index_size, pool->shm->waiting); } else { - progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, - ScanCtx.stat_index_size); + progress_bar_print((double) done / count, + ScanCtx.stat_tn_size, ScanCtx.stat_index_size); } } - if (pool->work_head == NULL) { - pthread_cond_signal(&(pool->working_cond)); + if (job == NULL) { + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->done_working_cond); + pthread_mutex_unlock(&pool->shm->mutex); } - pthread_mutex_unlock(&(pool->work_mutex)); } } +static void worker_proc_init(tpool_t *pool, int thread_id) { + // TODO create PID -> thread_id mapping for signal handler + + ProcData.thread_id = thread_id; + + if (ScanCtx.index.path[0] != '\0') { + // TODO This should be closed in proc cleanup function + ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE); + ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx; + database_open(ProcData.index_db); + } + + // TODO /dev/shm + pthread_mutex_lock(&pool->shm->mutex); + ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_CONSUMER_DATABASE); + ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx; + database_open(ProcData.ipc_db); + pthread_mutex_unlock(&pool->shm->mutex); +} + +void worker_proc_cleanup(tpool_t* pool) { + if (ProcData.index_db != NULL) { + database_close(ProcData.index_db, FALSE); + } + database_close(ProcData.ipc_db, FALSE); +} + /** * Thread worker function */ static void *tpool_worker(void *arg) { - tpool_t *pool = arg; + tpool_t *pool = ((start_thread_arg_t *) arg)->pool; - int pid = fork(); + if (pool->fork) { + while (TRUE) { + int pid = fork(); - if (pid == 0) { + if (pid == 0) { + worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); + + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->workers_initialized_cond); + pool->shm->initialized_count += 1; + pthread_mutex_unlock(&pool->shm->mutex); + + worker_thread_loop(pool); + + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->done_working_cond); + pthread_mutex_unlock(&pool->shm->mutex); + + worker_proc_cleanup(pool); + + exit(0); + + } else { + int status; + // TODO: On crash, print debug info and resume thread + waitpid(pid, &status, 0); + + LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status)); + + pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex)); + pool->shm->ipc_ctx.completed_job_count += 1; + pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex)); + + pthread_mutex_lock(&(pool->shm->data_mutex)); + pool->shm->busy_count -= 1; + pthread_mutex_unlock(&(pool->shm->data_mutex)); + + if (WIFSIGNALED(status)) { + // TODO: Get current_job based on PID + const char *job_filepath = "TODO"; + + LOG_FATALF_NO_EXIT( + "tpool.c", + "Child process was terminated by signal (%s).\n" + BLANK_STR "The process was working on %s", + strsignal(WTERMSIG(status)), + job_filepath + ); + } + break; + } + } + + } else { + worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id); + + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->workers_initialized_cond); + pool->shm->initialized_count += 1; + pthread_mutex_unlock(&pool->shm->mutex); worker_thread_loop(pool); - if (pool->cleanup_func != NULL) { - LOG_INFO("tpool.c", "Executing cleanup function") - pool->cleanup_func(); - LOG_DEBUG("tpool.c", "Done executing cleanup function") - } + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_signal(&pool->shm->done_working_cond); + pthread_mutex_unlock(&pool->shm->mutex); - pthread_cond_signal(&(pool->working_cond)); - pthread_mutex_unlock(&(pool->work_mutex)); - exit(0); - - } else { - int status; - // TODO: On crash, print debug info and resume thread - waitpid(pid, &status, 0); - - LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status)) - - pthread_mutex_lock(&(pool->work_mutex)); - pool->busy_cnt -= 1; - pool->done_cnt++; - pthread_mutex_unlock(&(pool->work_mutex)); - - if (WIFSIGNALED(status)) { -// parse_job_t *job = g_hash_table_lookup(ScanCtx.dbg_current_files, GINT_TO_POINTER(pthread_self())); - const char *job_filepath = "TODO"; - - LOG_FATALF_NO_EXIT( - "tpool.c", - "Child process was terminated by signal (%s).\n" - BLANK_STR "The process was working on %s", - strsignal(WTERMSIG(status)), - job_filepath - ) - } + return NULL; } return NULL; } void tpool_wait(tpool_t *pool) { - LOG_DEBUG("tpool.c", "Waiting for worker threads to finish") - pthread_mutex_lock(&(pool->work_mutex)); + LOG_DEBUG("tpool.c", "Waiting for worker threads to finish"); + pthread_mutex_lock(&pool->shm->mutex); - pool->waiting = TRUE; + pool->shm->waiting = TRUE; + pool->shm->ipc_ctx.no_more_jobs = TRUE; while (TRUE) { - if (pool->done_cnt < pool->work_cnt) { - pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); + if (pool->shm->ipc_ctx.job_count > 0) { + pthread_cond_wait(&(pool->shm->done_working_cond), &pool->shm->mutex); } else { - LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt); - - if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) { - pool->stop = TRUE; + if (pool->shm->ipc_ctx.job_count == 0 && pool->shm->busy_count == 0) { + pool->shm->stop = TRUE; break; } } @@ -262,34 +255,25 @@ void tpool_wait(tpool_t *pool) { if (pool->print_progress && !LogCtx.json_logs) { progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size); } - pthread_mutex_unlock(&(pool->work_mutex)); + pthread_mutex_unlock(&pool->shm->mutex); - LOG_INFO("tpool.c", "Worker threads finished") + LOG_INFO("tpool.c", "Worker threads finished"); } void tpool_destroy(tpool_t *pool) { - if (pool == NULL) { - return; - } + LOG_INFO("tpool.c", "Destroying thread pool"); - LOG_INFO("tpool.c", "Destroying thread pool") + database_close(ProcData.ipc_db, FALSE); - pthread_mutex_lock(&(pool->work_mutex)); - tpool_work_t *work = pool->work_head; int count = 0; - while (work != NULL) { - tpool_work_t *tmp = work->next; - free(work); - work = tmp; - count += 1; - } LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count); - pthread_cond_broadcast(&(pool->has_work_cond)); - pthread_mutex_unlock(&(pool->work_mutex)); + pthread_mutex_lock(&pool->shm->mutex); + pthread_cond_broadcast(&pool->shm->ipc_ctx.has_work_cond); + pthread_mutex_unlock(&pool->shm->mutex); - for (size_t i = 0; i < pool->thread_cnt; i++) { + for (size_t i = 0; i < pool->num_threads; i++) { pthread_t thread = pool->threads[i]; if (thread != 0) { void *_; @@ -297,42 +281,33 @@ void tpool_destroy(tpool_t *pool) { } } - LOG_INFO("tpool.c", "Final cleanup") + pthread_mutex_destroy(&pool->shm->ipc_ctx.mutex); + pthread_mutex_destroy(&pool->shm->mutex); + pthread_cond_destroy(&pool->shm->ipc_ctx.has_work_cond); + pthread_cond_destroy(&pool->shm->done_working_cond); - pthread_mutex_destroy(&(pool->work_mutex)); - pthread_cond_destroy(&(pool->has_work_cond)); - pthread_cond_destroy(&(pool->working_cond)); - - munmap(pool->shared_memory, pool->shared_memory_size); + munmap(pool->shm, sizeof(*pool->shm)); } /** * Create a thread pool * @param thread_cnt Worker threads count */ -tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int print_progress) { +tpool_t *tpool_create(int thread_cnt, int print_progress) { - size_t shm_size = 1024 * 1024 * 2000; + int fork = FALSE; - void *shared_memory = mmap(NULL, shm_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + tpool_t *pool = malloc(sizeof(tpool_t)); - tpool_t *pool = (tpool_t *) shared_memory; - pool->shared_memory = shared_memory; - pool->shared_memory_size = shm_size; - pool->mempool = (ncx_slab_pool_t *) (pool->shared_memory + sizeof(tpool_t)); - pool->mempool->addr = pool->mempool; - pool->mempool->min_shift = 4; - pool->mempool->end = pool->shared_memory + shm_size; + pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ncx_slab_init(pool->mempool); - - pool->thread_cnt = thread_cnt; - pool->work_cnt = 0; - pool->done_cnt = 0; - pool->busy_cnt = 0; - pool->stop = FALSE; - pool->waiting = FALSE; - pool->cleanup_func = cleanup_func; + pool->fork = fork; + pool->num_threads = thread_cnt; + pool->shm->ipc_ctx.job_count = 0; + pool->shm->ipc_ctx.no_more_jobs = FALSE; + pool->shm->stop = FALSE; + pool->shm->waiting = FALSE; + pool->shm->job_type = JOB_UNDEFINED; memset(pool->threads, 0, sizeof(pool->threads)); pool->print_progress = print_progress; @@ -340,27 +315,50 @@ tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int print_progress) { pthread_mutexattr_init(&mutexattr); pthread_mutexattr_setpshared(&mutexattr, TRUE); - pthread_mutex_init(&(pool->work_mutex), &mutexattr); - pthread_mutex_init(&(pool->mem_mutex), &mutexattr); + pthread_mutex_init(&(pool->shm->mutex), &mutexattr); + pthread_mutex_init(&(pool->shm->data_mutex), &mutexattr); + pthread_mutex_init(&(pool->shm->ipc_ctx.mutex), &mutexattr); + pthread_mutex_init(&(pool->shm->ipc_ctx.db_mutex), &mutexattr); + pthread_mutex_init(&(pool->shm->ipc_ctx.index_db_mutex), &mutexattr); pthread_condattr_t condattr; pthread_condattr_init(&condattr); pthread_condattr_setpshared(&condattr, TRUE); - pthread_cond_init(&(pool->has_work_cond), &condattr); - pthread_cond_init(&(pool->working_cond), &condattr); + pthread_cond_init(&(pool->shm->ipc_ctx.has_work_cond), &condattr); + pthread_cond_init(&(pool->shm->done_working_cond), &condattr); + pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr); - pool->work_head = NULL; - pool->work_tail = NULL; + remove("/dev/shm/ipc.sist2"); + remove("/dev/shm/ipc.sist2-wal"); + remove("/dev/shm/ipc.sist2-shm"); + ProcData.ipc_db = database_create("/dev/shm/ipc.sist2", IPC_PRODUCER_DATABASE); + ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx; + database_initialize(ProcData.ipc_db); return pool; } void tpool_start(tpool_t *pool) { - LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt) + LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->num_threads); - for (size_t i = 0; i < pool->thread_cnt; i++) { - pthread_create(&pool->threads[i], NULL, tpool_worker, pool); + pthread_mutex_lock(&pool->shm->mutex); + + for (int i = 0; i < pool->num_threads; i++) { + + start_thread_arg_t *arg = malloc(sizeof(start_thread_arg_t)); + arg->thread_id = i + 1; + arg->pool = pool; + + pthread_create(&pool->threads[i], NULL, tpool_worker, arg); } + + // Only open the database when all workers are done initializing + while (pool->shm->initialized_count != pool->num_threads) { + pthread_cond_wait(&pool->shm->workers_initialized_cond, &pool->shm->mutex); + } + pthread_mutex_unlock(&pool->shm->mutex); + + database_open(ProcData.ipc_db); } diff --git a/src/tpool.h b/src/tpool.h index 8f73449..71742d8 100644 --- a/src/tpool.h +++ b/src/tpool.h @@ -2,34 +2,27 @@ #define SIST2_TPOOL_H #include "sist.h" +#include "third-party/libscan/libscan/scan.h" +#include "index/elastic.h" +#include "src/database/database.h" struct tpool; typedef struct tpool tpool_t; -typedef struct { - size_t arg_size; - void *arg; -} tpool_work_arg_t; - -typedef struct { - size_t arg_size; - char arg[0]; -} tpool_work_arg_shm_t; - -typedef void (*thread_func_t)(tpool_work_arg_shm_t *arg); - -tpool_t *tpool_create(int num, void (*cleanup_func)(), int print_progress); +tpool_t *tpool_create(int num, int print_progress); void tpool_start(tpool_t *pool); void tpool_destroy(tpool_t *pool); -int tpool_add_work(tpool_t *pool, thread_func_t func, tpool_work_arg_t *arg); +int tpool_add_work(tpool_t *pool, job_t *job); void tpool_wait(tpool_t *pool); void tpool_dump_debug_info(tpool_t *pool); +void job_destroy(job_t *job); + #endif diff --git a/src/types.h b/src/types.h index 1ba8a66..f4528dd 100644 --- a/src/types.h +++ b/src/types.h @@ -1,24 +1,26 @@ #ifndef SIST2_TYPES_H #define SIST2_TYPES_H -#define INDEX_TYPE_NDJSON "ndjson" +typedef struct database database_t; typedef struct index_descriptor { char id[SIST_INDEX_ID_LEN]; char version[64]; + int version_major; + int version_minor; + int version_patch; long timestamp; char root[PATH_MAX]; char rewrite_url[8192]; - short root_len; + int root_len; char name[1024]; - char type[64]; } index_descriptor_t; typedef struct index_t { struct index_descriptor desc; - struct store_t *store; - struct store_t *tag_store; - struct store_t *meta_store; + + database_t *db; + char path[PATH_MAX]; } index_t; diff --git a/src/util.c b/src/util.c index 79d5bd2..5f6220c 100644 --- a/src/util.c +++ b/src/util.c @@ -25,7 +25,6 @@ dyn_buffer_t url_escape(char *str) { } char *abspath(const char *path) { - char *expanded = expandpath(path); char *abs = realpath(expanded, NULL); @@ -34,8 +33,7 @@ char *abspath(const char *path) { return NULL; } if (strlen(abs) > 1) { - abs = realloc(abs, strlen(abs) + 2); - strcat(abs, "/"); + abs = realloc(abs, strlen(abs) + 1); } return abs; @@ -76,9 +74,8 @@ char *expandpath(const char *path) { } } - char *expanded = malloc(strlen(tmp) + 2); + char *expanded = malloc(strlen(tmp) + 1); strcpy(expanded, tmp); - strcat(expanded, "/"); wordfree(&w); return expanded; @@ -103,6 +100,10 @@ void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t i void progress_bar_print(double percentage, size_t tn_size, size_t index_size) { + if (isnan(percentage)) { + return; + } + // TODO: Fix this with shm/ctx static int last_val = -1; @@ -150,10 +151,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) { PrintingProgressBar = TRUE; } -GHashTable *incremental_get_table() { - GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL); - return file_table; -} const char *find_file_in_paths(const char *paths[], const char *filename) { @@ -167,7 +164,7 @@ const char *find_file_in_paths(const char *paths[], const char *filename) { char path[PATH_MAX]; snprintf(path, sizeof(path), "%s%s", apath, filename); - LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath) + LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath); free(apath); struct stat info; @@ -269,3 +266,39 @@ void str_unescape(char *dst, const char *str) { } *cur = '\0'; } + +#define NSEC_PER_SEC 1000000000 + +struct timespec timespec_normalise(struct timespec ts) { + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_sec += 1; + ts.tv_nsec -= NSEC_PER_SEC; + } + + while (ts.tv_nsec <= -NSEC_PER_SEC) { + ts.tv_sec -= 1; + ts.tv_nsec += NSEC_PER_SEC; + } + + if (ts.tv_nsec < 0) { + ts.tv_sec -= 1; + ts.tv_nsec = (NSEC_PER_SEC + ts.tv_nsec); + } + + return ts; +} + +struct timespec timespec_add(struct timespec ts1, long usec) { + ts1 = timespec_normalise(ts1); + + struct timespec ts2 = timespec_normalise((struct timespec) { + .tv_sec = 0, + .tv_nsec = usec * 1000 + }); + + ts1.tv_sec += ts2.tv_sec; + ts1.tv_nsec += ts2.tv_nsec; + + return timespec_normalise(ts1); +} + diff --git a/src/util.h b/src/util.h index 75f494e..91a5855 100644 --- a/src/util.h +++ b/src/util.h @@ -5,8 +5,6 @@ #include #include -#include - #include "third-party/utf8.h/utf8.h" #include "libscan/scan.h" @@ -22,9 +20,6 @@ extern int PrintingProgressBar; void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t index_size, int waiting); void progress_bar_print(double percentage, size_t tn_size, size_t index_size); -GHashTable *incremental_get_table(); - - const char *find_file_in_paths(const char **paths, const char *filename); @@ -100,31 +95,23 @@ static void generate_doc_id(const char *rel_path, char *doc_id) { buf2hex(md, sizeof(md), doc_id); } -__always_inline -static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) { - char *ptr = malloc(SIST_DOC_ID_LEN); - strcpy(ptr, doc_id); - g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime)); -} +#define MILLISECOND 1000 -__always_inline -static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) { - if (table != NULL) { - return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id)); - } else { - return 0; - } -} +struct timespec timespec_add(struct timespec ts1, long usec); -/** - * Marks a file by adding it to a table. - * !!Not thread safe. - */ -__always_inline -static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) { - char *ptr = malloc(SIST_DOC_ID_LEN); - strcpy(ptr, doc_id); - return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1)); -} +#define TIMER_INIT() struct timespec timer_begin +#define TIMER_START() clock_gettime(CLOCK_REALTIME, &timer_begin) +#define TIMER_END(x) do { \ + struct timespec timer_end; \ + clock_gettime(CLOCK_REALTIME, &timer_end); \ + x = (timer_end.tv_sec - timer_begin.tv_sec) * 1000000 + (timer_end.tv_nsec - timer_begin.tv_nsec) / 1000; \ +} while (0) + +#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\ + struct timespec now; \ + clock_gettime(CLOCK_REALTIME, &now); \ + struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \ + pthread_cond_timedwait(cond, mutex, &end_time); \ + } while (0) #endif diff --git a/src/web/serve.c b/src/web/serve.c index 5302a59..b2b63d7 100644 --- a/src/web/serve.c +++ b/src/web/serve.c @@ -1,15 +1,14 @@ #include "serve.h" #include "src/sist.h" -#include "src/io/store.h" -#include "static_generated.c" +//#include "src/io/store.h" #include "src/index/elastic.h" #include "src/index/web.h" #include "src/auth0/auth0_c_api.h" +#include "src/web/web_util.h" #include -#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n" #define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n" #define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found"); @@ -20,62 +19,6 @@ static struct mg_http_serve_opts DefaultServeOpts = { .mime_types = "" }; - -__always_inline -static char *address_to_string(struct mg_addr *addr) { - static char address_to_string_buf[INET6_ADDRSTRLEN]; - - return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf)); -} - -static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) { - mg_printf( - nc, - "HTTP/1.1 %d %s\r\n" - HTTP_SERVER_HEADER - "Content-Length: %d\r\n" - "%s\r\n\r\n", - status_code, "OK", - length, - extra_headers - ); -} - - -index_t *get_index_by_id(const char *index_id) { - for (int i = WebCtx.index_count; i >= 0; i--) { - if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) { - return &WebCtx.indices[i]; - } - } - return NULL; -} - -store_t *get_store(const char *index_id) { - index_t *idx = get_index_by_id(index_id); - if (idx != NULL) { - return idx->store; - } - return NULL; -} - -store_t *get_tag_store(const char *index_id) { - index_t *idx = get_index_by_id(index_id); - if (idx != NULL) { - return idx->tag_store; - } - return NULL; -} - -void search_index(struct mg_connection *nc, struct mg_http_message *hm) { - if (WebCtx.dev) { - mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts); - } else { - send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html"); - mg_send(nc, index_html, sizeof(index_html)); - } -} - void stats_files(struct mg_connection *nc, struct mg_http_message *hm) { if (hm->uri.len != SIST_INDEX_ID_LEN + 4) { @@ -87,7 +30,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) { memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN); *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0'; - index_t *index = get_index_by_id(arg_index_id); + index_t *index = web_get_index_by_id(arg_index_id); if (index == NULL) { HTTP_REPLY_NOT_FOUND return; @@ -123,87 +66,58 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) { mg_http_serve_file(nc, hm, full_path, &opts); } -void javascript(struct mg_connection *nc, struct mg_http_message *hm) { +void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) { + if (WebCtx.dev) { + mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts); + } else { + web_serve_asset_index_html(nc); + } +} + +void serve_index_js(struct mg_connection *nc, struct mg_http_message *hm) { if (WebCtx.dev) { mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts); } else { - send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript"); - mg_send(nc, index_js, sizeof(index_js)); + web_serve_asset_index_js(nc); } } -void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) { +void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm) { if (WebCtx.dev) { mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts); } else { - send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript"); - mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js)); + web_serve_asset_chunk_vendors_js(nc); } } -void favicon(struct mg_connection *nc, struct mg_http_message *hm) { - send_response_line(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon"); - mg_send(nc, favicon_ico, sizeof(favicon_ico)); +void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) { + web_serve_asset_favicon_ico(nc); } -void style(struct mg_connection *nc, struct mg_http_message *hm) { - send_response_line(nc, 200, sizeof(index_css), "Content-Type: text/css"); - mg_send(nc, index_css, sizeof(index_css)); +void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) { + web_serve_asset_style_css(nc); } -void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) { - send_response_line(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css"); - mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css)); +void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) { + web_serve_asset_chunk_vendors_css(nc); } -void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { +void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const char *arg_index, + const char *arg_doc_id, int arg_num) { - int has_thumbnail_index = FALSE; - - if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) { - - if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) { - LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr) - HTTP_REPLY_NOT_FOUND - return; - } - has_thumbnail_index = TRUE; - } - - char arg_doc_id[SIST_DOC_ID_LEN]; - char arg_index[SIST_INDEX_ID_LEN]; - - memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN); - *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0'; - memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN); - *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0'; - - store_t *store = get_store(arg_index); - if (store == NULL) { - LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index) + database_t *db = web_get_database(arg_index); + if (db == NULL) { + LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index); HTTP_REPLY_NOT_FOUND return; } - char *data; size_t data_len = 0; - if (has_thumbnail_index) { - const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2; - - char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4]; - - memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id)); - memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4); - *(tn_key + sizeof(tn_key) - 1) = '\0'; - - data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len); - } else { - data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len); - } + void *data = database_read_thumbnail(db, arg_doc_id, arg_num, &data_len); if (data_len != 0) { - send_response_line( + web_send_headers( nc, 200, data_len, "Content-Type: image/jpeg\r\n" "Cache-Control: max-age=31536000" @@ -216,10 +130,50 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { } } -void search(struct mg_connection *nc, struct mg_http_message *hm) { +void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) { + if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 5) { + LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr); + HTTP_REPLY_NOT_FOUND + return; + } + char arg_doc_id[SIST_DOC_ID_LEN]; + char arg_index[SIST_INDEX_ID_LEN]; + char arg_num[5] = {0}; + + memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN); + *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0'; + memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN); + *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0'; + memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2, 4); + + int num = (int) strtol(arg_num, NULL, 10); + + serve_thumbnail(nc, hm, arg_index, arg_doc_id, num); +} + +void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { + + if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) { + LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr); + HTTP_REPLY_NOT_FOUND + return; + } + + char arg_doc_id[SIST_DOC_ID_LEN]; + char arg_index[SIST_INDEX_ID_LEN]; + + memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN); + *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0'; + memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN); + *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0'; + + serve_thumbnail(nc, hm, arg_index, arg_doc_id, 0); +} + +void search(struct mg_connection *nc, struct mg_http_message *hm) { if (hm->body.len == 0) { - LOG_DEBUG("serve.c", "Client sent empty body, ignoring request") + LOG_DEBUG("serve.c", "Client sent empty body, ignoring request"); mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request"); return; } @@ -266,7 +220,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) { LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, " - "serving file from disk might not work as expected.") + "serving file from disk might not work as expected."); } const char *path = cJSON_GetObjectItem(json, "path")->valuestring; @@ -285,7 +239,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/", name_unescaped, strlen(ext) == 0 ? "" : ".", ext); - LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path) + LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path); char disposition[8192]; snprintf(disposition, sizeof(disposition), @@ -372,7 +326,7 @@ void index_info(struct mg_connection *nc) { char *json_str = cJSON_PrintUnformatted(json); - send_response_line(nc, 200, strlen(json_str), "Content-Type: application/json"); + web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json"); mg_send(nc, json_str, strlen(json_str)); free(json_str); cJSON_Delete(json); @@ -382,7 +336,7 @@ void index_info(struct mg_connection *nc) { void file(struct mg_connection *nc, struct mg_http_message *hm) { if (hm->uri.len != SIST_DOC_ID_LEN + 2) { - LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr) + LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr); HTTP_REPLY_NOT_FOUND return; } @@ -412,7 +366,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) { next = parent->valuestring; } - index_t *idx = get_index_by_id(index_id->valuestring); + index_t *idx = web_get_index_by_id(index_id->valuestring); if (idx == NULL) { cJSON_Delete(doc); @@ -431,9 +385,9 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) { void status(struct mg_connection *nc) { char *status = elastic_get_status(); if (strcmp(status, "open") == 0) { - send_response_line(nc, 204, 0, "Content-Type: application/json"); + web_send_headers(nc, 204, 0, "Content-Type: application/json"); } else { - send_response_line(nc, 500, 0, "Content-Type: application/json"); + web_send_headers(nc, 500, 0, "Content-Type: application/json"); } free(status); @@ -475,114 +429,114 @@ tag_req_t *parse_tag_request(cJSON *json) { } void tag(struct mg_connection *nc, struct mg_http_message *hm) { - if (hm->uri.len != SIST_INDEX_ID_LEN + 4) { - LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr) - HTTP_REPLY_NOT_FOUND - return; - } - - char arg_index[SIST_INDEX_ID_LEN]; - memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN); - *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0'; - - if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) { - LOG_DEBUG("serve.c", "Invalid tag request") - HTTP_REPLY_NOT_FOUND - return; - } - - store_t *store = get_tag_store(arg_index); - if (store == NULL) { - LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index) - HTTP_REPLY_NOT_FOUND - return; - } - - char *body = malloc(hm->body.len + 1); - memcpy(body, hm->body.ptr, hm->body.len); - *(body + hm->body.len) = '\0'; - cJSON *json = cJSON_Parse(body); - - tag_req_t *arg_req = parse_tag_request(json); - if (arg_req == NULL) { - LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index) - cJSON_Delete(json); - free(body); - mg_http_reply(nc, 400, "", "Invalid request"); - return; - } - - cJSON *arr = NULL; - - size_t data_len = 0; - const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len); - if (data_len == 0) { - arr = cJSON_CreateArray(); - } else { - arr = cJSON_Parse(data); - } - - if (arg_req->delete) { - - if (data_len > 0) { - cJSON *element = NULL; - int i = 0; - cJSON_ArrayForEach(element, arr) { - if (strcmp(element->valuestring, arg_req->name) == 0) { - cJSON_DeleteItemFromArray(arr, i); - break; - } - i++; - } - } - - char *buf = malloc(sizeof(char) * 8192); - snprintf(buf, 8192, - "{" - " \"script\" : {" - " \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\"," - " \"lang\": \"painless\"," - " \"params\" : {" - " \"tag\" : \"%s\"" - " }" - " }" - "}", arg_req->name - ); - - char url[4096]; - snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); - nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl); - - } else { - cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); - - char *buf = malloc(sizeof(char) * 8192); - snprintf(buf, 8192, - "{" - " \"script\" : {" - " \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\"," - " \"lang\": \"painless\"," - " \"params\" : {" - " \"tag\" : \"%s\"" - " }" - " }" - "}", arg_req->name - ); - - char url[4096]; - snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); - nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl); - } - - char *json_str = cJSON_PrintUnformatted(arr); - store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1); - store_flush(store); - - free(arg_req); - free(json_str); - cJSON_Delete(json); - cJSON_Delete(arr); - free(body); +// if (hm->uri.len != SIST_INDEX_ID_LEN + 4) { +// LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr) +// HTTP_REPLY_NOT_FOUND +// return; +// } +// +// char arg_index[SIST_INDEX_ID_LEN]; +// memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN); +// *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0'; +// +// if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) { +// LOG_DEBUG("serve.c", "Invalid tag request") +// HTTP_REPLY_NOT_FOUND +// return; +// } +// +// store_t *store = get_tag_store(arg_index); +// if (store == NULL) { +// LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index) +// HTTP_REPLY_NOT_FOUND +// return; +// } +// +// char *body = malloc(hm->body.len + 1); +// memcpy(body, hm->body.ptr, hm->body.len); +// *(body + hm->body.len) = '\0'; +// cJSON *json = cJSON_Parse(body); +// +// tag_req_t *arg_req = parse_tag_request(json); +// if (arg_req == NULL) { +// LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index) +// cJSON_Delete(json); +// free(body); +// mg_http_reply(nc, 400, "", "Invalid request"); +// return; +// } +// +// cJSON *arr = NULL; +// +// size_t data_len = 0; +// const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len); +// if (data_len == 0) { +// arr = cJSON_CreateArray(); +// } else { +// arr = cJSON_Parse(data); +// } +// +// if (arg_req->delete) { +// +// if (data_len > 0) { +// cJSON *element = NULL; +// int i = 0; +// cJSON_ArrayForEach(element, arr) { +// if (strcmp(element->valuestring, arg_req->name) == 0) { +// cJSON_DeleteItemFromArray(arr, i); +// break; +// } +// i++; +// } +// } +// +// char *buf = malloc(sizeof(char) * 8192); +// snprintf(buf, 8192, +// "{" +// " \"script\" : {" +// " \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\"," +// " \"lang\": \"painless\"," +// " \"params\" : {" +// " \"tag\" : \"%s\"" +// " }" +// " }" +// "}", arg_req->name +// ); +// +// char url[4096]; +// snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); +// nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl); +// +// } else { +// cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name)); +// +// char *buf = malloc(sizeof(char) * 8192); +// snprintf(buf, 8192, +// "{" +// " \"script\" : {" +// " \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\"," +// " \"lang\": \"painless\"," +// " \"params\" : {" +// " \"tag\" : \"%s\"" +// " }" +// " }" +// "}", arg_req->name +// ); +// +// char url[4096]; +// snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id); +// nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl); +// } +// +// char *json_str = cJSON_PrintUnformatted(arr); +// store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1); +// store_flush(store); +// +// free(arg_req); +// free(json_str); +// cJSON_Delete(json); +// cJSON_Delete(arr); +// free(body); } int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) { @@ -601,7 +555,7 @@ int check_auth0(struct mg_http_message *hm) { struct mg_str *cookie = mg_http_get_header(hm, "Cookie"); if (cookie == NULL) { - LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)") + LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)"); return FALSE; } @@ -610,7 +564,7 @@ int check_auth0(struct mg_http_message *hm) { token = mg_http_get_header_var(*cookie, mg_str("sist2-auth0")); if (token.len == 0) { - LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)") + LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)"); return FALSE; } @@ -644,28 +598,31 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo } } + char uri[256]; + memcpy(uri, hm->uri.ptr, hm->uri.len); + *(uri + hm->uri.len) = '\0'; LOG_DEBUGF("serve.c", "<%s> GET %s", - address_to_string(&(nc->rem)), - hm->uri - ) + web_address_to_string(&(nc->rem)), + uri + ); if (mg_http_match_uri(hm, "/")) { - search_index(nc, hm); + serve_index_html(nc, hm); return; } else if (mg_http_match_uri(hm, "/favicon.ico")) { - favicon(nc, hm); + serve_favicon_ico(nc, hm); return; } else if (mg_http_match_uri(hm, "/css/index.css")) { - style(nc, hm); + serve_style_css(nc, hm); return; } else if (mg_http_match_uri(hm, "/css/chunk-vendors.css")) { - style_vendor(nc, hm); + serve_chunk_vendors_css(nc, hm); return; } else if (mg_http_match_uri(hm, "/js/index.js")) { - javascript(nc, hm); + serve_index_js(nc, hm); return; } else if (mg_http_match_uri(hm, "/js/chunk-vendors.js")) { - javascript_vendor(nc, hm); + serve_chunk_vendors_js(nc, hm); return; } else if (mg_http_match_uri(hm, "/i")) { index_info(nc); @@ -683,6 +640,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo status(nc); } else if (mg_http_match_uri(hm, "/f/*")) { file(nc, hm); + } else if (mg_http_match_uri(hm, "/t/*/*/*")) { + thumbnail_with_num(nc, hm); } else if (mg_http_match_uri(hm, "/t/*/*")) { thumbnail(nc, hm); } else if (mg_http_match_uri(hm, "/s/*/*")) { @@ -706,7 +665,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo response_t *r = ctx->response; if (r->status_code == 200) { - send_response_line(nc, 200, r->size, "Content-Type: application/json"); + web_send_headers(nc, 200, r->size, "Content-Type: application/json"); mg_send(nc, r->body, r->size); } else if (r->status_code == 0) { sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!"); @@ -738,7 +697,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo void serve(const char *listen_address) { - LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address) + LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address); struct mg_mgr mgr; mg_mgr_init(&mgr); @@ -747,12 +706,12 @@ void serve(const char *listen_address) { struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL); if (nc == NULL) { - LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address) + LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address); } while (ok) { mg_mgr_poll(&mgr, 10); } mg_mgr_free(&mgr); - LOG_INFO("serve.c", "Finished web event loop") + LOG_INFO("serve.c", "Finished web event loop"); } diff --git a/src/web/web_util.c b/src/web/web_util.c new file mode 100644 index 0000000..8cd86f4 --- /dev/null +++ b/src/web/web_util.c @@ -0,0 +1,63 @@ +#include "web_util.h" +#include "static_generated.c" + + +void web_serve_asset_index_html(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(index_html), "Content-Type: text/html"); + mg_send(nc, index_html, sizeof(index_html)); +} + +void web_serve_asset_index_js(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript"); + mg_send(nc, index_js, sizeof(index_js)); +} + +void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript"); + mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js)); +} + +void web_serve_asset_favicon_ico(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon"); + mg_send(nc, favicon_ico, sizeof(favicon_ico)); +} + +void web_serve_asset_style_css(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css"); + mg_send(nc, index_css, sizeof(index_css)); +} + +void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) { + web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css"); + mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css)); +} + +index_t *web_get_index_by_id(const char *index_id) { + for (int i = WebCtx.index_count; i >= 0; i--) { + if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) { + return &WebCtx.indices[i]; + } + } + return NULL; +} + +database_t *web_get_database(const char *index_id) { + index_t *idx = web_get_index_by_id(index_id); + if (idx != NULL) { + return idx->db; + } + return NULL; +} + +void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) { + mg_printf( + nc, + "HTTP/1.1 %d %s\r\n" + HTTP_SERVER_HEADER + "Content-Length: %d\r\n" + "%s\r\n\r\n", + status_code, "OK", + length, + extra_headers + ); +} diff --git a/src/web/web_util.h b/src/web/web_util.h new file mode 100644 index 0000000..a470dba --- /dev/null +++ b/src/web/web_util.h @@ -0,0 +1,32 @@ +#ifndef SIST2_WEB_UTIL_H +#define SIST2_WEB_UTIL_H + +#include "src/sist.h" +#include "src/index/elastic.h" +#include "src/ctx.h" +#include + +#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n" + +index_t *web_get_index_by_id(const char *index_id); + +database_t *web_get_database(const char *index_id); + +__always_inline +static char *web_address_to_string(struct mg_addr *addr) { + return "TODO"; +// static char address_to_string_buf[INET6_ADDRSTRLEN]; +// +// return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf)); +} + +void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers); + +void web_serve_asset_index_html(struct mg_connection *nc); +void web_serve_asset_index_js(struct mg_connection *nc); +void web_serve_asset_chunk_vendors_js(struct mg_connection *nc); +void web_serve_asset_favicon_ico(struct mg_connection *nc); +void web_serve_asset_style_css(struct mg_connection *nc); +void web_serve_asset_chunk_vendors_css(struct mg_connection *nc); + +#endif //SIST2_WEB_UTIL_H diff --git a/third-party/libscan/CMakeLists.txt b/third-party/libscan/CMakeLists.txt index ea8042b..3cc065d 100644 --- a/third-party/libscan/CMakeLists.txt +++ b/third-party/libscan/CMakeLists.txt @@ -97,7 +97,6 @@ find_package(LibLZMA REQUIRED) find_package(ZLIB REQUIRED) find_package(unofficial-pcre CONFIG REQUIRED) - find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec) find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd) find_library(FREETYPE_LIB NAMES freetype freetyped) @@ -110,6 +109,7 @@ find_library(CMS_LIB NAMES lcms2) find_library(JAS_LIB NAMES jasper) find_library(GUMBO_LIB NAMES gumbo) find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/) +find_package(Leptonica CONFIG REQUIRED) target_compile_options( @@ -231,6 +231,7 @@ target_link_libraries( antiword mobi unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp + leptonica ) target_include_directories( diff --git a/third-party/libscan/libscan/arc/arc.c b/third-party/libscan/libscan/arc/arc.c index 092bc8c..d56f538 100644 --- a/third-party/libscan/libscan/arc/arc.c +++ b/third-party/libscan/libscan/arc/arc.c @@ -9,27 +9,13 @@ #define MAX_DECOMPRESSED_SIZE_RATIO 40.0 -int should_parse_filtered_file(const char *filepath, int ext) { - char tmp[PATH_MAX * 2]; +int should_parse_filtered_file(const char *filepath) { - if (ext == 0) { - return FALSE; - } - - if (strncmp(filepath + ext, "tgz", 3) == 0) { + if (strstr(filepath, ".tgz")) { return TRUE; } - memcpy(tmp, filepath, ext - 1); - *(tmp + ext - 1) = '\0'; - - char *idx = strrchr(tmp, '.'); - - if (idx == NULL) { - return FALSE; - } - - if (strcmp(idx, ".tar") == 0) { + if (strstr(filepath, ".tar.")) { return TRUE; } @@ -206,18 +192,10 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { struct stat entry_stat = *archive_entry_stat(entry); - sub_job->vfile.st_mode = entry_stat.st_mode; sub_job->vfile.st_size = entry_stat.st_size; sub_job->vfile.mtime = (int) entry_stat.st_mtim.tv_sec; - double decompressed_size_ratio = (double) sub_job->vfile.st_size / (double) f->st_size; - if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) { - CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath, - decompressed_size_ratio) - continue; - } - - if (S_ISREG(sub_job->vfile.st_mode)) { + if (S_ISREG(entry_stat.st_mode)) { const char *utf8_name = archive_entry_pathname_utf8(entry); @@ -231,6 +209,13 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre } sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; + double decompressed_size_ratio = (double) sub_job->vfile.st_size / (double) f->st_size; + if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) { + CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath, + decompressed_size_ratio) + break; + } + // Handle excludes if (exclude != NULL && EXCLUDED(sub_job->filepath)) { CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath) diff --git a/third-party/libscan/libscan/arc/arc.h b/third-party/libscan/libscan/arc/arc.h index ea9d570..1ea5e49 100644 --- a/third-party/libscan/libscan/arc/arc.h +++ b/third-party/libscan/libscan/arc/arc.h @@ -67,7 +67,7 @@ static int vfile_close_callback(struct archive *a, void *user_data) { int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse); -int should_parse_filtered_file(const char *filepath, int ext); +int should_parse_filtered_file(const char *filepath); scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra); diff --git a/third-party/libscan/libscan/ebook/ebook.c b/third-party/libscan/libscan/ebook/ebook.c index acf70fd..95fb4a4 100644 --- a/third-party/libscan/libscan/ebook/ebook.c +++ b/third-party/libscan/libscan/ebook/ebook.c @@ -162,7 +162,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d avcodec_receive_packet(jpeg_encoder, &jpeg_packet); APPEND_LONG_META(doc, MetaThumbnail, 1) - ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size); + ctx->store(doc->doc_id, 0, (char *) jpeg_packet.data, jpeg_packet.size); free(samples); av_packet_unref(&jpeg_packet); diff --git a/third-party/libscan/libscan/font/font.c b/third-party/libscan/libscan/font/font.c index 6092b19..d734134 100644 --- a/third-party/libscan/libscan/font/font.c +++ b/third-party/libscan/libscan/font/font.c @@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { bmp_format(&bmp_data, dimensions, bitmap); APPEND_LONG_META(doc, MetaThumbnail, 1) - ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur); + ctx->store(doc->doc_id, 0, bmp_data.buf, bmp_data.cur); dyn_buffer_destroy(&bmp_data); free(bitmap); diff --git a/third-party/libscan/libscan/media/media.c b/third-party/libscan/libscan/media/media.c index fe9360e..38cb421 100644 --- a/third-party/libscan/libscan/media/media.c +++ b/third-party/libscan/libscan/media/media.c @@ -468,8 +468,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor if (scaled_frame == STORE_AS_IS) { return_value = SAVE_THUMBNAIL_OK; - ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data, - frame_and_packet->packet->size); + ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size); } else { // Encode frame to jpeg AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, @@ -482,19 +481,17 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor // Save thumbnail if (thumbnail_index == 0) { - ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size); + ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size); return_value = SAVE_THUMBNAIL_OK; } else if (thumbnail_index > 1) { - return_value = SAVE_THUMBNAIL_OK; // TO FIX: the 2nd rendered frame is always broken, just skip it until // I figure out a better fix. thumbnail_index -= 1; - char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4]; - snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index); + ctx->store(doc->doc_id, thumbnail_index, jpeg_packet.data, jpeg_packet.size); - ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size); + return_value = SAVE_THUMBNAIL_OK; } else { return_value = SAVE_THUMBNAIL_SKIPPED; } @@ -854,8 +851,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu if (scaled_frame == STORE_AS_IS) { APPEND_LONG_META(doc, MetaThumbnail, 1) - ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data, - frame_and_packet->packet->size); + ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size); } else { // Encode frame to jpeg AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, @@ -868,7 +864,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu // Save thumbnail APPEND_LONG_META(doc, MetaThumbnail, 1) - ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size); + ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size); av_packet_unref(&jpeg_packet); avcodec_free_context(&jpeg_encoder); diff --git a/third-party/libscan/libscan/ooxml/ooxml.c b/third-party/libscan/libscan/ooxml/ooxml.c index 61a496b..49010e1 100644 --- a/third-party/libscan/libscan/ooxml/ooxml.c +++ b/third-party/libscan/libscan/ooxml/ooxml.c @@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s archive_read_data(a, buf, entry_size); APPEND_LONG_META(doc, MetaThumbnail, 1) - ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size); + ctx->store(doc->doc_id, 1, buf, entry_size); free(buf); } diff --git a/third-party/libscan/libscan/scan.h b/third-party/libscan/libscan/scan.h index fd3fd1f..9d09016 100644 --- a/third-party/libscan/libscan/scan.h +++ b/third-party/libscan/libscan/scan.h @@ -6,6 +6,7 @@ #endif #include +#include #include #include #include @@ -16,7 +17,7 @@ #define UNUSED(x) __attribute__((__unused__)) x -typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len); +typedef void (*store_callback_t)(char *key, int num, void *buf, size_t buf_len); typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...); @@ -111,8 +112,8 @@ typedef struct document { unsigned long size; unsigned int mime; int mtime; - short base; - short ext; + int base; + int ext; meta_line_t *meta_head; meta_line_t *meta_tail; char filepath[PATH_MAX * 2 + 1]; @@ -144,7 +145,6 @@ typedef struct vfile { int mtime; size_t st_size; - unsigned int st_mode; SHA_CTX sha1_ctx; unsigned char sha1_digest[SHA1_DIGEST_LENGTH]; @@ -161,7 +161,7 @@ typedef struct vfile { logf_callback_t logf; } vfile_t; -typedef struct parse_job_t { +typedef struct { int base; int ext; struct vfile vfile; diff --git a/third-party/libscan/libscan/util.h b/third-party/libscan/libscan/util.h index 09f7ad5..11092e2 100644 --- a/third-party/libscan/libscan/util.h +++ b/third-party/libscan/libscan/util.h @@ -358,4 +358,37 @@ static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) { } } +static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_size) { + parse_job_t *job = (parse_job_t *) malloc(sizeof(parse_job_t)); + + job->parent[0] = '\0'; + + strcpy(job->filepath, filepath); + strcpy(job->vfile.filepath, filepath); + job->vfile.st_size = st_size; + job->vfile.mtime = mtime; + + const char *slash = strrchr(filepath, '/'); + if (slash == NULL) { + job->base = 0; + } else { + job->base = (int) (slash - filepath + 1); + } + + const char *dot = strrchr(filepath + job->base, '.'); + if (dot == NULL) { + job->ext = (int) strlen(filepath); + } else { + job->ext = (int) (dot - filepath + 1); + } + + job->vfile.fd = -1; + job->vfile.is_fs_file = TRUE; + job->vfile.has_checksum = FALSE; + job->vfile.rewind_buffer_size = 0; + job->vfile.rewind_buffer = NULL; + + return job; +} + #endif diff --git a/third-party/libscan/test/test_util.cpp b/third-party/libscan/test/test_util.cpp index fb9aa5c..6ed8998 100644 --- a/third-party/libscan/test/test_util.cpp +++ b/third-party/libscan/test/test_util.cpp @@ -55,7 +55,6 @@ void load_file(const char *filepath, vfile_t *f) { f->mtime = (int)info.st_mtim.tv_sec; f->st_size = info.st_size; - f->st_mode = info.st_mode; f->fd = open(filepath, O_RDONLY); diff --git a/third-party/libscan/test/test_util.h b/third-party/libscan/test/test_util.h index e388b49..98243d2 100644 --- a/third-party/libscan/test/test_util.h +++ b/third-party/libscan/test/test_util.h @@ -21,7 +21,7 @@ static void noop_log(const char *filepath, int level, char *str) { static size_t store_size = 0; -static void counter_store(char* key, size_t key_len, char *value, size_t value_len) { +static void counter_store(char* key, int num, void *value, size_t value_len) { store_size += value_len; // char id[37]; // char tmp[PATH_MAX]; diff --git a/third-party/libscan/third-party/antiword b/third-party/libscan/third-party/antiword index ddb0421..badfdac 160000 --- a/third-party/libscan/third-party/antiword +++ b/third-party/libscan/third-party/antiword @@ -1 +1 @@ -Subproject commit ddb042143e72a8b789e06f09dbc897dfa9f15b82 +Subproject commit badfdac84586511d4f2b626516162d62a3625349