From 050c1283a35e0368f9fee5756695b64adf73698a Mon Sep 17 00:00:00 2001 From: simon987 Date: Wed, 30 Dec 2020 11:36:59 -0500 Subject: [PATCH] Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump --- .gitignore | 1 - CMakeLists.txt | 8 +-- README.md | 2 +- docs/USAGE.md | 4 +- schema/mappings.json | 4 ++ src/index/elastic.c | 20 +++---- src/index/elastic.h | 10 ++-- src/index/static_generated.c | 2 +- src/io/serialize.c | 79 +++++++++++++----------- src/io/serialize.h | 4 +- src/io/store.c | 12 ++-- src/io/store.h | 2 + src/io/walk.c | 2 +- src/main.c | 25 ++++---- src/parsing/parse.c | 37 +++++++----- src/parsing/sidecar.c | 10 ++-- src/sist.h | 4 +- src/static/js/dom.js | 4 +- src/static/js/search.js | 3 +- src/static/search.html | 2 +- src/static/stats.html | 2 +- src/stats.c | 8 +-- src/types.h | 2 +- src/util.c | 3 +- src/util.h | 112 +++++++++++++++++++++++++++++------ src/web/serve.c | 84 +++++++++++++------------- src/web/static_generated.c | 8 +-- tests/test_scan.py | 75 +++++++++++++++++++++++ third-party/libscan | 2 +- 29 files changed, 352 insertions(+), 179 deletions(-) create mode 100644 tests/test_scan.py diff --git a/.gitignore b/.gitignore index eb526b3..eb4db54 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .idea thumbs -test *.cbp CMakeCache.txt CMakeFiles diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c8b8cf..e32a237 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,6 @@ find_package(lmdb CONFIG REQUIRED) find_package(cJSON CONFIG REQUIRED) find_package(unofficial-glib CONFIG REQUIRED) find_package(unofficial-mongoose CONFIG REQUIRED) -find_library(UUID_LIB NAMES uuid) find_package(CURL CONFIG REQUIRED) #find_package(OpenSSL REQUIRED) @@ -68,7 +67,8 @@ if (SIST_DEBUG) -fstack-protector -fno-omit-frame-pointer -fsanitize=address - -O2 + -fno-inline +# -O2 ) target_link_options( sist2 @@ -81,7 +81,6 @@ if (SIST_DEBUG) OUTPUT_NAME sist2_debug ) else () - # set(VCPKG_BUILD_TYPE release) target_compile_options( sist2 PRIVATE @@ -108,10 +107,11 @@ target_link_libraries( unofficial::mongoose::mongoose CURL::libcurl - ${UUID_LIB} pthread magic + c + scan ) diff --git a/README.md b/README.md index aececa7..d90bd73 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ binaries (GCC 7+ required). 1. Install compile-time dependencies ```bash - vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf + vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf ``` 2. Build diff --git a/docs/USAGE.md b/docs/USAGE.md index 0e50c58..bc7278b 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -241,9 +241,11 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea *thumbs/*: -LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field) +LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field) and values are raw image bytes. +*\* Hash is calculated from the full path of the file, including the extension, relative to the index root* + Importing an external `binary` type index is technically possible but it is currently unsupported and has no guaranties of back/forward compatibility. diff --git a/schema/mappings.json b/schema/mappings.json index c4150c0..4925c22 100644 --- a/schema/mappings.json +++ b/schema/mappings.json @@ -30,6 +30,10 @@ "mime": { "type": "keyword" }, + "parent": { + "type": "keyword", + "index": false + }, "thumbnail": { "type": "keyword", "index": false diff --git a/src/index/elastic.c b/src/index/elastic.c index 7d2b0a5..8ff764a 100644 --- a/src/index/elastic.c +++ b/src/index/elastic.c @@ -30,11 +30,11 @@ void elastic_cleanup() { } } -void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { +void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) { cJSON *line = cJSON_CreateObject(); - cJSON_AddStringToObject(line, "_id", uuid_str); + cJSON_AddStringToObject(line, "_id", id_str); cJSON_AddStringToObject(line, "_index", IndexCtx.es_index); cJSON_AddStringToObject(line, "_type", "_doc"); cJSON_AddItemReferenceToObject(line, "_source", document); @@ -52,13 +52,13 @@ void index_json_func(void *arg) { elastic_index_line(line); } -void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { +void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) { char *json = cJSON_PrintUnformatted(document); size_t json_len = strlen(json); es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2); memcpy(bulk_line->line, json, json_len); - memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN); + memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH); *(bulk_line->line + json_len) = '\n'; *(bulk_line->line + json_len + 1) = '\0'; bulk_line->next = NULL; @@ -67,7 +67,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { tpool_add_work(IndexCtx.pool, index_json_func, bulk_line); } -void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) { +void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) { if (Indexer == NULL) { Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index); @@ -129,9 +129,9 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) { while (line != NULL && *count < max) { char action_str[256]; snprintf( - action_str, 256, + action_str, sizeof(action_str), "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n", - line->uuid_str, Indexer->es_index + line->path_md5_str, Indexer->es_index ); size_t action_str_len = strlen(action_str); @@ -220,7 +220,7 @@ void _elastic_flush(int max) { if (r->status_code == 413) { if (max <= 1) { - LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str) + LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str) free_response(r); free(buf); delete_queue(1); @@ -408,9 +408,9 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s } } -cJSON *elastic_get_document(const char *uuid_str) { +cJSON *elastic_get_document(const char *id_str) { char url[4096]; - snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str); + snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str); response_t *r = web_get(url, 3); cJSON *json = NULL; diff --git a/src/index/elastic.h b/src/index/elastic.h index c8c08a3..e316440 100644 --- a/src/index/elastic.h +++ b/src/index/elastic.h @@ -5,7 +5,7 @@ typedef struct es_bulk_line { struct es_bulk_line *next; - char uuid_str[UUID_STR_LEN]; + char path_md5_str[MD5_STR_LENGTH]; char line[0]; } es_bulk_line_t; @@ -16,9 +16,9 @@ typedef struct es_indexer es_indexer_t; void elastic_index_line(es_bulk_line_t *line); -void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]); +void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]); -void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]); +void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]); es_indexer_t *create_indexer(const char *url, const char *index); @@ -27,10 +27,10 @@ void finish_indexer(char *script, int async_script, char *index_id); void elastic_init(int force_reset, const char* user_mappings, const char* user_settings); -cJSON *elastic_get_document(const char *uuid_str); +cJSON *elastic_get_document(const char *id_str); char *elastic_get_status(); -void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]); +void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]); #endif diff --git a/src/index/static_generated.c b/src/index/static_generated.c index 87fb2b1..14a7539 100644 --- a/src/index/static_generated.c +++ b/src/index/static_generated.c @@ -1,3 +1,3 @@ -char mappings_json[1954] = {123,34,112,114,111,112,101,114,116,105,101,115,34,58,123,34,95,116,105,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,100,111,99,95,118,97,108,117,101,115,34,58,116,114,117,101,125,44,34,95,100,101,112,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,112,97,116,104,34,44,34,102,105,101,108,100,100,97,116,97,34,58,116,114,117,101,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,44,34,116,101,120,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,125,125,125,44,34,115,117,103,103,101,115,116,45,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,109,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,116,104,117,109,98,110,97,105,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,118,105,100,101,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,97,117,100,105,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,100,117,114,97,116,105,111,110,34,58,123,34,116,121,112,101,34,58,34,102,108,111,97,116,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,119,105,100,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,104,101,105,103,104,116,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,112,97,103,101,115,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,109,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,115,105,122,101,34,58,123,34,116,121,112,101,34,58,34,108,111,110,103,34,125,44,34,105,110,100,101,120,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,102,111,110,116,95,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,116,105,116,108,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,103,101,110,114,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,95,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,95,107,101,121,119,111,114,100,46,42,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,95,116,101,120,116,46,42,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,95,117,114,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,99,111,110,116,101,110,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,105,110,100,101,120,95,111,112,116,105,111,110,115,34,58,34,111,102,102,115,101,116,115,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,116,97,103,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,116,97,103,34,125,44,34,115,117,103,103,101,115,116,45,116,97,103,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,101,120,105,102,95,109,97,107,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,109,111,100,101,108,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,58,115,111,102,116,119,97,114,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,101,120,112,111,115,117,114,101,95,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,110,117,109,98,101,114,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,105,115,111,95,115,112,101,101,100,95,114,97,116,105,110,103,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,111,99,97,108,95,108,101,110,103,116,104,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,117,115,101,114,95,99,111,109,109,101,110,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,117,116,104,111,114,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,109,111,100,105,102,105,101,100,95,98,121,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,125,125,0}; +char mappings_json[1996] = {123,34,112,114,111,112,101,114,116,105,101,115,34,58,123,34,95,116,105,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,100,111,99,95,118,97,108,117,101,115,34,58,116,114,117,101,125,44,34,95,100,101,112,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,112,97,116,104,34,44,34,102,105,101,108,100,100,97,116,97,34,58,116,114,117,101,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,44,34,116,101,120,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,125,125,125,44,34,115,117,103,103,101,115,116,45,112,97,116,104,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,109,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,112,97,114,101,110,116,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,116,104,117,109,98,110,97,105,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,118,105,100,101,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,97,117,100,105,111,99,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,100,117,114,97,116,105,111,110,34,58,123,34,116,121,112,101,34,58,34,102,108,111,97,116,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,119,105,100,116,104,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,104,101,105,103,104,116,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,112,97,103,101,115,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,109,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,105,110,116,101,103,101,114,34,125,44,34,115,105,122,101,34,58,123,34,116,121,112,101,34,58,34,108,111,110,103,34,125,44,34,105,110,100,101,120,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,102,111,110,116,95,110,97,109,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,116,105,116,108,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,103,101,110,114,101,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,108,98,117,109,95,97,114,116,105,115,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,95,107,101,121,119,111,114,100,46,42,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,95,116,101,120,116,46,42,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,95,117,114,108,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,105,110,100,101,120,34,58,102,97,108,115,101,125,44,34,99,111,110,116,101,110,116,34,58,123,34,97,110,97,108,121,122,101,114,34,58,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,44,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,105,110,100,101,120,95,111,112,116,105,111,110,115,34,58,34,111,102,102,115,101,116,115,34,44,34,102,105,101,108,100,115,34,58,123,34,110,71,114,97,109,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,44,34,97,110,97,108,121,122,101,114,34,58,34,109,121,95,110,71,114,97,109,34,125,125,125,44,34,116,97,103,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,44,34,99,111,112,121,95,116,111,34,58,34,115,117,103,103,101,115,116,45,116,97,103,34,125,44,34,115,117,103,103,101,115,116,45,116,97,103,34,58,123,34,116,121,112,101,34,58,34,99,111,109,112,108,101,116,105,111,110,34,44,34,97,110,97,108,121,122,101,114,34,58,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,125,44,34,101,120,105,102,95,109,97,107,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,109,111,100,101,108,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,58,115,111,102,116,119,97,114,101,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,101,120,105,102,95,101,120,112,111,115,117,114,101,95,116,105,109,101,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,110,117,109,98,101,114,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,105,115,111,95,115,112,101,101,100,95,114,97,116,105,110,103,115,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,102,111,99,97,108,95,108,101,110,103,116,104,34,58,123,34,116,121,112,101,34,58,34,107,101,121,119,111,114,100,34,125,44,34,101,120,105,102,95,117,115,101,114,95,99,111,109,109,101,110,116,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,97,117,116,104,111,114,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,44,34,109,111,100,105,102,105,101,100,95,98,121,34,58,123,34,116,121,112,101,34,58,34,116,101,120,116,34,125,125,125,0}; char settings_json[548] = {123,34,105,110,100,101,120,34,58,123,34,114,101,102,114,101,115,104,95,105,110,116,101,114,118,97,108,34,58,34,51,48,115,34,44,34,99,111,100,101,99,34,58,34,98,101,115,116,95,99,111,109,112,114,101,115,115,105,111,110,34,44,34,110,117,109,98,101,114,95,111,102,95,114,101,112,108,105,99,97,115,34,58,48,125,44,34,97,110,97,108,121,115,105,115,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,123,34,112,97,116,104,95,116,111,107,101,110,105,122,101,114,34,58,123,34,116,121,112,101,34,58,34,112,97,116,104,95,104,105,101,114,97,114,99,104,121,34,125,44,34,109,121,95,110,71,114,97,109,95,116,111,107,101,110,105,122,101,114,34,58,123,34,116,121,112,101,34,58,34,110,71,114,97,109,34,44,34,109,105,110,95,103,114,97,109,34,58,51,44,34,109,97,120,95,103,114,97,109,34,58,51,125,125,44,34,97,110,97,108,121,122,101,114,34,58,123,34,112,97,116,104,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,112,97,116,104,95,116,111,107,101,110,105,122,101,114,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,93,125,44,34,99,97,115,101,95,105,110,115,101,110,115,105,116,105,118,101,95,107,119,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,107,101,121,119,111,114,100,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,93,125,44,34,109,121,95,110,71,114,97,109,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,109,121,95,110,71,114,97,109,95,116,111,107,101,110,105,122,101,114,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,44,34,97,115,99,105,105,102,111,108,100,105,110,103,34,93,125,44,34,99,111,110,116,101,110,116,95,97,110,97,108,121,122,101,114,34,58,123,34,116,111,107,101,110,105,122,101,114,34,58,34,115,116,97,110,100,97,114,100,34,44,34,102,105,108,116,101,114,34,58,91,34,108,111,119,101,114,99,97,115,101,34,44,34,97,115,99,105,105,102,111,108,100,105,110,103,34,93,125,125,125,125,0}; char pipeline_json[217] = {123,34,100,101,115,99,114,105,112,116,105,111,110,34,58,34,67,111,112,121,32,95,105,100,32,116,111,32,95,116,105,101,44,32,115,97,118,101,32,112,97,116,104,32,100,101,112,116,104,34,44,34,112,114,111,99,101,115,115,111,114,115,34,58,91,123,34,115,99,114,105,112,116,34,58,123,34,115,111,117,114,99,101,34,58,34,99,116,120,46,95,116,105,101,32,61,32,99,116,120,46,95,105,100,59,32,99,116,120,46,95,100,101,112,116,104,32,61,32,99,116,120,46,112,97,116,104,46,108,101,110,103,116,104,40,41,32,61,61,32,48,32,63,32,48,32,58,32,49,32,43,32,99,116,120,46,112,97,116,104,46,108,101,110,103,116,104,40,41,32,45,32,99,116,120,46,112,97,116,104,46,114,101,112,108,97,99,101,40,92,34,47,92,34,44,32,92,34,92,34,41,46,108,101,110,103,116,104,40,41,59,34,125,125,93,125,0}; diff --git a/src/io/serialize.c b/src/io/serialize.c index e6eeb63..424b132 100644 --- a/src/io/serialize.c +++ b/src/io/serialize.c @@ -6,13 +6,13 @@ static __thread int index_fd = -1; typedef struct { - unsigned char uuid[16]; - unsigned long ino; + unsigned char path_md5[MD5_DIGEST_LENGTH]; unsigned long size; unsigned int mime; int mtime; short base; short ext; + char has_parent; } line_t; void skip_meta(FILE *file) { @@ -32,7 +32,7 @@ void skip_meta(FILE *file) { void write_index_descriptor(char *path, index_descriptor_t *desc) { cJSON *json = cJSON_CreateObject(); - cJSON_AddStringToObject(json, "uuid", desc->uuid); + cJSON_AddStringToObject(json, "id", desc->id); cJSON_AddStringToObject(json, "version", desc->version); cJSON_AddStringToObject(json, "root", desc->root); cJSON_AddStringToObject(json, "name", desc->name); @@ -82,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) { strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring); descriptor.root_len = (short) strlen(descriptor.root); strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring); - strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring); + strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring); if (cJSON_GetObjectItem(json, "type") == NULL) { strcpy(descriptor.type, INDEX_TYPE_BIN); } else { @@ -219,7 +219,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { dyn_buffer_t buf = dyn_buffer_create(); FILE *file = fopen(path, "rb"); - while (1) { + while (TRUE) { buf.cur = 0; size_t _ = fread((void *) &line, 1, sizeof(line_t), file); if (feof(file)) { @@ -229,8 +229,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { cJSON *document = cJSON_CreateObject(); cJSON_AddStringToObject(document, "index", index_id); - char uuid_str[UUID_STR_LEN]; - uuid_unparse(line.uuid, uuid_str); + char path_md5_str[MD5_STR_LENGTH]; + buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str); const char *mime_text = mime_get_mime_text(line.mime); if (mime_text == NULL) { @@ -247,9 +247,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { } dyn_buffer_write_char(&buf, '\0'); - char full_filename[PATH_MAX]; - strcpy(full_filename, buf.buf); - cJSON_AddStringToObject(document, "extension", buf.buf + line.ext); if (*(buf.buf + line.ext - 1) == '.') { *(buf.buf + line.ext - 1) = '\0'; @@ -331,7 +328,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { cJSON *meta_obj = NULL; if (IndexCtx.meta != NULL) { - const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename); + const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str); if (meta_string != NULL) { meta_obj = cJSON_Parse(meta_string); @@ -346,7 +343,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { } if (IndexCtx.tags != NULL) { - const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename); + const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str); if (tags_string != NULL) { cJSON *tags_arr = cJSON_Parse(tags_string); cJSON_DeleteItemFromObject(document, "tag"); @@ -354,7 +351,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) { } } - func(document, uuid_str); + func(document, path_md5_str); cJSON_Delete(document); if (meta_obj) { cJSON_Delete(meta_obj); @@ -382,7 +379,7 @@ const char *json_type_array_fields[] = { void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) { FILE *file = fopen(path, "r"); - while (1) { + while (TRUE) { char *line = NULL; size_t len; size_t read = getline(&line, &len, file); @@ -402,7 +399,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func } cJSON *document = cJSON_CreateObject(); - const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring; + const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring; for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) { cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]); @@ -430,7 +427,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func } } - func(document, uuid_str); + func(document, id_str); cJSON_Delete(document); cJSON_Delete(input); @@ -438,7 +435,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func fclose(file); } -void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) { +void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) { if (strcmp(type, INDEX_TYPE_BIN) == 0) { read_index_bin(path, index_id, func); @@ -451,13 +448,15 @@ void incremental_read(GHashTable *table, const char *filepath) { FILE *file = fopen(filepath, "rb"); line_t line; + LOG_DEBUGF("serialize.c", "Incremental read %s", filepath) + while (1) { - size_t ret = fread((void *) &line, 1, sizeof(line_t), file); + size_t ret = fread((void *) &line, sizeof(line_t), 1, file); if (ret != 1 || feof(file)) { break; } - incremental_put(table, line.ino, line.mtime); + incremental_put(table, line.path_md5, line.mtime); while ((getc(file))) {} skip_meta(file); @@ -475,33 +474,47 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, FILE *dst_file = fopen(dst_filepath, "ab"); line_t line; - while (1) { - size_t ret = fread((void *) &line, 1, sizeof(line_t), file); + LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath) + + while (TRUE) { + size_t ret = fread((void *) &line, sizeof(line_t), 1, file); if (ret != 1 || feof(file)) { break; } - if (incremental_get(copy_table, line.ino)) { + // Assume that files with parents still exist. + // One way to "fix" this would be to check if the parent is marked for copy but it would consistently + // delete files with grandparents, which is a side-effect worse than having orphaned files + if (line.has_parent || incremental_get(copy_table, line.path_md5)) { fwrite(&line, sizeof(line), 1, dst_file); - size_t buf_len; - char *buf = store_read(store, (char *) line.uuid, 16, &buf_len); - store_write(dst_store, (char *) line.uuid, 16, buf, buf_len); - free(buf); - + // Copy filepath + char filepath_buf[PATH_MAX]; char c; + char *ptr = filepath_buf; while ((c = (char) getc(file))) { - fwrite(&c, sizeof(c), 1, dst_file); + *ptr++ = c; + } + *ptr = '\0'; + fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file); + + // Copy tn store contents + size_t buf_len; + char path_md5[MD5_DIGEST_LENGTH]; + MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5); + char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len); + if (buf_len != 0) { + store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len); + free(buf); } - fwrite("\0", sizeof(c), 1, dst_file); enum metakey key; while (1) { key = getc(file); + fwrite(&key, sizeof(char), 1, dst_file); if (key == '\n') { break; } - fwrite(&key, sizeof(char), 1, dst_file); if (IS_META_INT(key)) { int val; @@ -517,14 +530,12 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, } fwrite("\0", sizeof(c), 1, dst_file); } - - if (ret != 1) { - break; - } } } else { + while ((getc(file))) {} skip_meta(file); } } fclose(file); + fclose(dst_file); } diff --git a/src/io/serialize.h b/src/io/serialize.h index 767b439..8ed5fe0 100644 --- a/src/io/serialize.h +++ b/src/io/serialize.h @@ -7,14 +7,14 @@ #include #include -typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]); +typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]); void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, const char *dst_filepath, GHashTable *copy_table); void write_document(document_t *doc); -void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func); +void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func); void incremental_read(GHashTable *table, const char *filepath); diff --git a/src/io/store.c b/src/io/store.c index 3000b1b..a7871cc 100644 --- a/src/io/store.c +++ b/src/io/store.c @@ -40,13 +40,17 @@ void store_destroy(store_t *store) { free(store); } +void store_flush(store_t *store) { + mdb_env_sync(store->env, TRUE); +} + void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) { if (LogCtx.very_verbose) { - if (key_len == 16) { - char uuid_str[UUID_STR_LEN] = {0, }; - uuid_unparse((unsigned char *) key, uuid_str); - LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len) + if (key_len == MD5_DIGEST_LENGTH) { + char path_md5_str[MD5_STR_LENGTH]; + buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str); + LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len) } else { LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len) } diff --git a/src/io/store.h b/src/io/store.h index aaaae89..18905e4 100644 --- a/src/io/store.h +++ b/src/io/store.h @@ -24,6 +24,8 @@ void store_destroy(store_t *store); void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len); +void store_flush(store_t *store); + char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen); GHashTable *store_read_all(store_t *store); diff --git a/src/io/walk.c b/src/io/walk.c index abbd79c..0d7ad5e 100644 --- a/src/io/walk.c +++ b/src/io/walk.c @@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, job->vfile.info = *info; - memset(job->parent, 0, 16); + memset(job->parent, 0, MD5_DIGEST_LENGTH); job->vfile.filepath = job->filepath; job->vfile.read = fs_read; diff --git a/src/main.c b/src/main.c index f9fb08b..d5b5593 100644 --- a/src/main.c +++ b/src/main.c @@ -21,7 +21,7 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "2.8.5"; +static const char *const Version = "2.9.0"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", @@ -34,9 +34,10 @@ void init_dir(const char *dirpath) { char path[PATH_MAX]; snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath); - uuid_t uuid; - uuid_generate(uuid); - uuid_unparse(uuid, ScanCtx.index.desc.uuid); + unsigned char index_md5[MD5_DIGEST_LENGTH]; + MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5); + buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id); + time(&ScanCtx.index.desc.timestamp); strcpy(ScanCtx.index.desc.version, Version); strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN); @@ -218,7 +219,7 @@ void sist2_scan(scan_args_t *args) { while ((de = readdir(dir)) != NULL) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { char file_path[PATH_MAX]; - snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name); + snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); incremental_read(ScanCtx.original_table, file_path); } } @@ -233,8 +234,6 @@ void sist2_scan(scan_args_t *args) { tpool_wait(ScanCtx.pool); tpool_destroy(ScanCtx.pool); - generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path); - if (args->incremental != NULL) { char dst_path[PATH_MAX]; snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); @@ -250,7 +249,7 @@ void sist2_scan(scan_args_t *args) { while ((de = readdir(dir)) != NULL) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { char file_path[PATH_MAX]; - snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name); + snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name); incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table); } } @@ -265,6 +264,8 @@ void sist2_scan(scan_args_t *args) { store_destroy(source_tags); } + generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path); + store_destroy(ScanCtx.index.store); } @@ -327,7 +328,7 @@ void sist2_index(index_args_t *args) { if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) { char file_path[PATH_MAX]; snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name); - read_index(file_path, desc.uuid, desc.type, f); + read_index(file_path, desc.id, desc.type, f); } } closedir(dir); @@ -337,7 +338,7 @@ void sist2_index(index_args_t *args) { tpool_destroy(IndexCtx.pool); if (!args->print) { - finish_indexer(args->script, args->async_script, desc.uuid); + finish_indexer(args->script, args->async_script, desc.id); } store_destroy(IndexCtx.tag_store); @@ -357,7 +358,7 @@ void sist2_exec_script(exec_args_t *args) { LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) - execute_update_script(args->script, args->async_script, desc.uuid); + execute_update_script(args->script, args->async_script, desc.id); free(args->script); } @@ -533,7 +534,7 @@ int main(int argc, const char *argv[]) { } sist2_web(web_args); - } else if (strcmp(argv[0], "exec-script") == 0) { + } else if (strcmp(argv[0], "exec-script") == 0) { int err = exec_args_validate(exec_args, argc, argv); if (err != 0) { diff --git a/src/parsing/parse.c b/src/parsing/parse.c index ff76007..91951dd 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -46,29 +46,31 @@ void parse(void *arg) { parse_job_t *job = arg; document_t doc; - int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino); - if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { - incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino); - return; - } - doc.filepath = job->filepath; doc.ext = (short) job->ext; doc.base = (short) job->base; + + char *rel_path = doc.filepath + ScanCtx.index.desc.root_len; + MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5); + doc.meta_head = NULL; doc.meta_tail = NULL; doc.mime = 0; doc.size = job->vfile.info.st_size; - doc.ino = job->vfile.info.st_ino; doc.mtime = job->vfile.info.st_mtim.tv_sec; - uuid_generate(doc.uuid); + int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5); + if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { + incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5); + return; + } + char *buf[MAGIC_BUF_SIZE]; if (LogCtx.very_verbose) { - char uuid_str[UUID_STR_LEN]; - uuid_unparse(doc.uuid, uuid_str); - LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str) + char path_md5_str[MD5_STR_LENGTH]; + buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str); + LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str) } if (job->vfile.info.st_size == 0) { @@ -86,7 +88,8 @@ void parse(void *arg) { // Get mime type with libmagic if (!job->vfile.is_fs_file) { - LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported"); + LOG_WARNING(job->filepath, + "Guessing mime type with libmagic inside archive files is not currently supported"); goto abort; } @@ -169,11 +172,15 @@ void parse(void *arg) { abort: //Parent meta - if (!uuid_is_null(job->parent)) { - meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1); + if (!md5_digest_is_null(job->parent)) { + meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH); meta_parent->key = MetaParent; - uuid_unparse(job->parent, meta_parent->str_val); + buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val); APPEND_META((&doc), meta_parent) + + doc.has_parent = TRUE; + } else { + doc.has_parent = FALSE; } write_document(&doc); diff --git a/src/parsing/sidecar.c b/src/parsing/sidecar.c index ed7e606..4043b5b 100644 --- a/src/parsing/sidecar.c +++ b/src/parsing/sidecar.c @@ -7,7 +7,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) { LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath) size_t size; - char* buf = read_all(vfile, &size); + char *buf = read_all(vfile, &size); if (buf == NULL) { LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath) return; @@ -23,11 +23,11 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) { } char *json_str = cJSON_PrintUnformatted(json); - char filepath[PATH_MAX]; - memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len); - *(filepath + doc->ext - 1) = '\0'; + unsigned char path_md5[MD5_DIGEST_LENGTH]; + MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len, + path_md5); - store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1); + store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1); cJSON_Delete(json); free(json_str); diff --git a/src/sist.h b/src/sist.h index 2c2992f..705b7fd 100644 --- a/src/sist.h +++ b/src/sist.h @@ -23,9 +23,10 @@ #undef ABS #define ABS(a) (((a) < 0) ? -(a) : (a)) -#define UUID_STR_LEN 37 #define UNUSED(x) __attribute__((__unused__)) x +#define MD5_STR_LENGTH 33 + #include "util.h" #include "log.h" #include "types.h" @@ -47,5 +48,4 @@ #include #include - #endif diff --git a/src/static/js/dom.js b/src/static/js/dom.js index 7ea8c24..6ccbf45 100644 --- a/src/static/js/dom.js +++ b/src/static/js/dom.js @@ -22,7 +22,7 @@ function gifOver(thumbnail, hit) { thumbnail.addEventListener("mouseout", function () { //Reset timer thumbnail.mouseStayedOver = false; - thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`); + thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`); }) } @@ -419,7 +419,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) { thumbnail.setAttribute("class", "card-img-top fit"); } } - thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`); + thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`); if (shouldDisplayRawImage(hit)) { thumbnail.addEventListener("click", () => { diff --git a/src/static/js/search.js b/src/static/js/search.js index e3efc54..4778af6 100644 --- a/src/static/js/search.js +++ b/src/static/js/search.js @@ -174,7 +174,7 @@ function saveTag(tag, hit) { delete: false, name: tag, doc_id: hit["_id"], - relpath: relPath + path_md5: md5(relPath) }).then(() => { tagBar.blur(); $("#tagModal").modal("hide"); @@ -604,6 +604,7 @@ function search(after = null) { hits.forEach(hit => { hit["_source"]["name"] = strUnescape(hit["_source"]["name"]); hit["_source"]["path"] = strUnescape(hit["_source"]["path"]); + hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit)); }); if (!after) { diff --git a/src/static/search.html b/src/static/search.html index 62ba89f..d55a765 100644 --- a/src/static/search.html +++ b/src/static/search.html @@ -12,7 +12,7 @@