Fix tag delete

Update tags tab automatically
Update binary names (again)
2025-12-12 15:08:53 +00:00 · 2020-12-31 12:55:37 -05:00 · 2020-12-31 12:45:23 -05:00 · 2020-12-31 11:03:25 -05:00 · 2020-12-31 10:55:34 -05:00 · 2020-12-31 10:54:30 -05:00
37 changed files with 499 additions and 296 deletions
--- a/.drone.yml
+++ b/.drone.yml
@@ -0,0 +1,56 @@
+kind: pipeline
+type: docker
+name: amd64
+
+platform:
+  os: linux
+  arch: amd64
+
+steps:
+  - name: build
+    image: simon987/ubuntu_ci
+    commands:
+      - ./ci/build.sh
+  - name: scp files
+    image: appleboy/drone-scp
+    settings:
+      host:
+        from_secret: SSH_HOST
+      port:
+        from_secret: SSH_PORT
+      user:
+        from_secret: SSH_USER
+      key:
+        from_secret: SSH_KEY
+      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
+      source:
+        - ./sist2-x64-linux
+        - ./sist2-x64-linux-debug.tar.gz
+
+---
+kind: pipeline
+type: docker
+name: arm64
+
+platform:
+  arch: arm64
+
+steps:
+  - name: build
+    image: simon987/ubuntu_ci_arm
+    commands:
+      - ./ci/build_arm64.sh
+  - name: scp files
+    image: appleboy/drone-scp
+    settings:
+      host:
+        from_secret: SSH_HOST
+      port:
+        from_secret: SSH_PORT
+      user:
+        from_secret: SSH_USER
+      key:
+        from_secret: SSH_KEY
+      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
+      source:
+        - ./sist2-arm64-linux
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
 .idea
 thumbs
-test
 *.cbp
 CMakeCache.txt
 CMakeFiles
--- a/.teamcity/settings.kts
+++ b/.teamcity/settings.kts
@@ -1,69 +0,0 @@
-import jetbrains.buildServer.configs.kotlin.v2019_2.*
-import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
-import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
-import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
-import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
-
-/*
-The settings script is an entry point for defining a TeamCity
-project hierarchy. The script should contain a single call to the
-project() function with a Project instance or an init function as
-an argument.
-
-VcsRoots, BuildTypes, Templates, and subprojects can be
-registered inside the project using the vcsRoot(), buildType(),
-template(), and subProject() methods respectively.
-
-To debug settings scripts in command-line, run the
-
-    mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
-
-command and attach your debugger to the port 8000.
-
-To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
-(Plugins -> teamcity-configs -> teamcity-configs:generate), the
-'Debug' option is available in the context menu for the task.
-*/
-
-version = "2019.2"
-
-project {
-
-    vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
-
-    buildType(Build)
-}
-
-object Build : BuildType({
-    name = "Build"
-
-    artifactRules = """
-        sist2
-        sist2_scan
-    """.trimIndent()
-
-    vcs {
-        root(HttpsGithubComSimon987sist2refsHeadsMaster)
-    }
-
-    steps {
-        exec {
-            name = "Build"
-            path = "./ci/build.sh"
-            dockerImage = "simon987/general_ci"
-            dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
-            dockerPull = true
-        }
-    }
-
-    triggers {
-        vcs {
-        }
-    }
-})
-
-object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
-    name = "https://github.com/simon987/sist2#refs/heads/master"
-    url = "https://github.com/simon987/sist2"
-})
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,6 +5,7 @@ project(sist2 C)

 option(SIST_DEBUG "Build a debug executable" on)

+set(BUILD_TESTS off)
 add_subdirectory(third-party/libscan)
 set(ARGPARSE_SHARED off)
 add_subdirectory(third-party/argparse)
@@ -39,7 +40,6 @@ find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-glib CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
-find_library(UUID_LIB NAMES uuid)
 find_package(CURL CONFIG REQUIRED)

 #find_package(OpenSSL REQUIRED)
@@ -67,7 +67,8 @@ if (SIST_DEBUG)
            -fstack-protector
            -fno-omit-frame-pointer
            -fsanitize=address
-            -O2
+            -fno-inline
+#            -O2
    )
    target_link_options(
            sist2
@@ -80,7 +81,6 @@ if (SIST_DEBUG)
            OUTPUT_NAME sist2_debug
    )
 else ()
-    #    set(VCPKG_BUILD_TYPE release)
    target_compile_options(
            sist2
            PRIVATE
@@ -107,10 +107,11 @@ target_link_libraries(
        unofficial::mongoose::mongoose
        CURL::libcurl

-        ${UUID_LIB}
        pthread
        magic

+        c
+
        scan
 )

--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
 [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
-[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
+[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/)

-**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
+**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)

 # sist2

@@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
        ```
 1. Download sist2 executable
    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
-    1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
+    1. *(or)* Download a [development snapshot](https://files.simon987.net/sist2/simon987_sist2/) *(Not recommended!)*
    1. *(or)* `docker pull simon987/sist2:latest`

 1. See [Usage guide](docs/USAGE.md)
@@ -74,7 +74,7 @@ See [Usage guide](docs/USAGE.md) for more details

 File type | Library | Content | Thumbnail | Metadata
 :---|:---|:---|:---|:---
-pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
+pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
 cbz,cbr | *(none)* | - | yes | - |
 `audio/*` | ffmpeg | - | yes | ID3 tags |
 `video/*` | ffmpeg | - | yes | title, comment, artist |
@@ -85,6 +85,7 @@ ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 html, xml | *(none)* | yes | no | - |
 tar, zip, rar, 7z, ar ...  | Libarchive | yes\* | - | no |
 docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
+doc (MS Word 97-2003) | antiword | yes | yes | author, title |
 mobi, azw, azw3 | libmobi | yes | no | author, title |

 \* *See [Archive files](#archive-files)*
@@ -126,12 +127,12 @@ binaries (GCC 7+ required).
 1. Install compile-time dependencies

   ```bash
-   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
+   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
   ```

 2. Build
    ```bash
    git clone --recursive https://github.com/simon987/sist2/
-    cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
+    cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
    make
    ```
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -4,14 +4,17 @@ VCPKG_ROOT="/vcpkg"

 rm *.gz

-rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-make -j 12
-strip sist2
-gzip -9 sist2
+git submodule update --init --recursive

 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-make -j 12
+cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 33
+strip sist2
+mv sist2 sist2-x64-linux
+
+rm -rf CMakeFiles CMakeCache.txt
+cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 33
 cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
-tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
+mv sist2_debug sist2-x64-linux-debug
+tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2
--- a/ci/build_arm64.sh
+++ b/ci/build_arm64.sh
@@ -4,9 +4,10 @@ VCPKG_ROOT="/vcpkg"

 rm *.gz

+git submodule update --init --recursive
+
 rm -rf CMakeFiles CMakeCache.txt
 cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
 make -j 4
 strip sist2
-mv sist2 sist2_arm64
-gzip -9 sist2_arm64
+mv sist2 sist2-arm64-linux
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -241,9 +241,11 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea

 *thumbs/*:

-LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
+LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
 and values are raw image bytes.

+*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
+
 Importing an external `binary` type index is technically possible but
 it is currently unsupported and has no guaranties of back/forward compatibility.

--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -30,6 +30,10 @@
    "mime": {
      "type": "keyword"
    },
+    "parent": {
+      "type": "keyword",
+      "index": false
+    },
    "thumbnail": {
      "type": "keyword",
      "index": false
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -13,6 +13,7 @@
 #include "libscan/text/text.h"
 #include "libscan/mobi/scan_mobi.h"
 #include "libscan/raw/raw.h"
+#include "libscan/msdoc/msdoc.h"
 #include "src/io/store.h"

 #include <glib.h>
@@ -48,6 +49,7 @@ typedef struct {
    scan_text_ctx_t text_ctx;
    scan_mobi_ctx_t mobi_ctx;
    scan_raw_ctx_t raw_ctx;
+    scan_msdoc_ctx_t msdoc_ctx;
 } ScanCtx_t;

 typedef struct {
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -30,11 +30,11 @@ void elastic_cleanup() {
    }
 }

-void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
+void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {

    cJSON *line = cJSON_CreateObject();

-    cJSON_AddStringToObject(line, "_id", uuid_str);
+    cJSON_AddStringToObject(line, "_id", id_str);
    cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
    cJSON_AddStringToObject(line, "_type", "_doc");
    cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -52,13 +52,13 @@ void index_json_func(void *arg) {
    elastic_index_line(line);
 }

-void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
+void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
    memcpy(bulk_line->line, json, json_len);
-    memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
+    memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
    *(bulk_line->line + json_len) = '\n';
    *(bulk_line->line + json_len + 1) = '\0';
    bulk_line->next = NULL;
@@ -67,7 +67,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

-void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {
+void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {

    if (Indexer == NULL) {
        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -129,9 +129,9 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
    while (line != NULL && *count < max) {
        char action_str[256];
        snprintf(
-                action_str, 256,
+                action_str, sizeof(action_str),
                "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
-                line->uuid_str, Indexer->es_index
+                line->path_md5_str, Indexer->es_index
        );

        size_t action_str_len = strlen(action_str);
@@ -220,7 +220,7 @@ void _elastic_flush(int max) {
    if (r->status_code == 413) {

        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
            free_response(r);
            free(buf);
            delete_queue(1);
@@ -408,9 +408,9 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
    }
 }

-cJSON *elastic_get_document(const char *uuid_str) {
+cJSON *elastic_get_document(const char *id_str) {
    char url[4096];
-    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);
+    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);

    response_t *r = web_get(url, 3);
    cJSON *json = NULL;
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -5,7 +5,7 @@

 typedef struct es_bulk_line {
    struct es_bulk_line *next;
-    char uuid_str[UUID_STR_LEN];
+    char path_md5_str[MD5_STR_LENGTH];
    char line[0];
 } es_bulk_line_t;

@@ -16,9 +16,9 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
+void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);

-void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
+void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);

 es_indexer_t *create_indexer(const char *url, const char *index);

@@ -27,10 +27,10 @@ void finish_indexer(char *script, int async_script, char *index_id);

 void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);

-cJSON *elastic_get_document(const char *uuid_str);
+cJSON *elastic_get_document(const char *id_str);

 char *elastic_get_status();

-void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);
+void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);

 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -6,13 +6,13 @@
 static __thread int index_fd = -1;

 typedef struct {
-    unsigned char uuid[16];
-    unsigned long ino;
+    unsigned char path_md5[MD5_DIGEST_LENGTH];
    unsigned long size;
    unsigned int mime;
    int mtime;
    short base;
    short ext;
+    char has_parent;
 } line_t;

 void skip_meta(FILE *file) {
@@ -32,7 +32,7 @@ void skip_meta(FILE *file) {

 void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON *json = cJSON_CreateObject();
-    cJSON_AddStringToObject(json, "uuid", desc->uuid);
+    cJSON_AddStringToObject(json, "id", desc->id);
    cJSON_AddStringToObject(json, "version", desc->version);
    cJSON_AddStringToObject(json, "root", desc->root);
    cJSON_AddStringToObject(json, "name", desc->name);
@@ -82,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
    descriptor.root_len = (short) strlen(descriptor.root);
    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
-    strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
+    strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
    if (cJSON_GetObjectItem(json, "type") == NULL) {
        strcpy(descriptor.type, INDEX_TYPE_BIN);
    } else {
@@ -219,7 +219,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
    dyn_buffer_t buf = dyn_buffer_create();

    FILE *file = fopen(path, "rb");
-    while (1) {
+    while (TRUE) {
        buf.cur = 0;
        size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
        if (feof(file)) {
@@ -229,8 +229,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        cJSON *document = cJSON_CreateObject();
        cJSON_AddStringToObject(document, "index", index_id);

-        char uuid_str[UUID_STR_LEN];
-        uuid_unparse(line.uuid, uuid_str);
+        char path_md5_str[MD5_STR_LENGTH];
+        buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);

        const char *mime_text = mime_get_mime_text(line.mime);
        if (mime_text == NULL) {
@@ -247,9 +247,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        }
        dyn_buffer_write_char(&buf, '\0');

-        char full_filename[PATH_MAX];
-        strcpy(full_filename, buf.buf);
-
        cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
        if (*(buf.buf + line.ext - 1) == '.') {
            *(buf.buf + line.ext - 1) = '\0';
@@ -331,7 +328,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {

        cJSON *meta_obj = NULL;
        if (IndexCtx.meta != NULL) {
-            const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
+            const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
            if (meta_string != NULL) {
                meta_obj = cJSON_Parse(meta_string);

@@ -346,7 +343,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        }

        if (IndexCtx.tags != NULL) {
-            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
+            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
            if (tags_string != NULL) {
                cJSON *tags_arr = cJSON_Parse(tags_string);
                cJSON_DeleteItemFromObject(document, "tag");
@@ -354,7 +351,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
            }
        }

-        func(document, uuid_str);
+        func(document, path_md5_str);
        cJSON_Delete(document);
        if (meta_obj) {
            cJSON_Delete(meta_obj);
@@ -382,7 +379,7 @@ const char *json_type_array_fields[] = {
 void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {

    FILE *file = fopen(path, "r");
-    while (1) {
+    while (TRUE) {
        char *line = NULL;
        size_t len;
        size_t read = getline(&line, &len, file);
@@ -402,7 +399,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
        }

        cJSON *document = cJSON_CreateObject();
-        const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
+        const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;

        for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
            cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -430,7 +427,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
            }
        }

-        func(document, uuid_str);
+        func(document, id_str);
        cJSON_Delete(document);
        cJSON_Delete(input);

@@ -438,7 +435,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
    fclose(file);
 }

-void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
+void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {

    if (strcmp(type, INDEX_TYPE_BIN) == 0) {
        read_index_bin(path, index_id, func);
@@ -451,13 +448,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
    FILE *file = fopen(filepath, "rb");
    line_t line;

+    LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
+
    while (1) {
-        size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
+        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
        if (ret != 1 || feof(file)) {
            break;
        }

-        incremental_put(table, line.ino, line.mtime);
+        incremental_put(table, line.path_md5, line.mtime);

        while ((getc(file))) {}
        skip_meta(file);
@@ -475,33 +474,47 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
    FILE *dst_file = fopen(dst_filepath, "ab");
    line_t line;

-    while (1) {
-        size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
+    LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
+
+    while (TRUE) {
+        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
        if (ret != 1 || feof(file)) {
            break;
        }

-        if (incremental_get(copy_table, line.ino)) {
+        // Assume that files with parents still exist.
+        //  One way to "fix" this would be to check if the parent is marked for copy but it would consistently
+        //  delete files with grandparents, which is a side-effect worse than having orphaned files
+        if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
            fwrite(&line, sizeof(line), 1, dst_file);

-            size_t buf_len;
-            char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
-            store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
-            free(buf);
-
+            // Copy filepath
+            char filepath_buf[PATH_MAX];
            char c;
+            char *ptr = filepath_buf;
            while ((c = (char) getc(file))) {
-                fwrite(&c, sizeof(c), 1, dst_file);
+                *ptr++ = c;
+            }
+            *ptr = '\0';
+            fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
+
+            // Copy tn store contents
+            size_t buf_len;
+            char path_md5[MD5_DIGEST_LENGTH];
+            MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
+            char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
+            if (buf_len != 0) {
+                store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
+                free(buf);
            }
-            fwrite("\0", sizeof(c), 1, dst_file);

            enum metakey key;
            while (1) {
                key = getc(file);
+                fwrite(&key, sizeof(char), 1, dst_file);
                if (key == '\n') {
                    break;
                }
-                fwrite(&key, sizeof(char), 1, dst_file);

                if (IS_META_INT(key)) {
                    int val;
@@ -517,14 +530,12 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                    }
                    fwrite("\0", sizeof(c), 1, dst_file);
                }
-
-                if (ret != 1) {
-                    break;
-                }
            }
        } else {
+            while ((getc(file))) {}
            skip_meta(file);
        }
    }
    fclose(file);
+    fclose(dst_file);
 }
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -7,14 +7,14 @@
 #include <sys/syscall.h>
 #include <glib.h>

-typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
+typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);

 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);

 void write_document(document_t *doc);

-void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
+void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath);

--- a/src/io/store.c
+++ b/src/io/store.c
@@ -40,13 +40,17 @@ void store_destroy(store_t *store) {
    free(store);
 }

+void store_flush(store_t *store) {
+    mdb_env_sync(store->env, TRUE);
+}
+
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

    if (LogCtx.very_verbose) {
-        if (key_len == 16) {
-            char uuid_str[UUID_STR_LEN] = {0, };
-            uuid_unparse((unsigned char *) key, uuid_str);
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
+        if (key_len == MD5_DIGEST_LENGTH) {
+            char path_md5_str[MD5_STR_LENGTH];
+            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
+            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
        } else {
            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
        }
--- a/src/io/store.h
+++ b/src/io/store.h
@@ -24,6 +24,8 @@ void store_destroy(store_t *store);

 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);

+void store_flush(store_t *store);
+
 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);

 GHashTable *store_read_all(store_t *store);
--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,

    job->vfile.info = *info;

-    memset(job->parent, 0, 16);
+    memset(job->parent, 0, MD5_DIGEST_LENGTH);

    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
--- a/src/main.c
+++ b/src/main.c
@@ -21,7 +21,7 @@
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "2.8.5";
+static const char *const Version = "2.9.0";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
@@ -34,9 +34,10 @@ void init_dir(const char *dirpath) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

-    uuid_t uuid;
-    uuid_generate(uuid);
-    uuid_unparse(uuid, ScanCtx.index.desc.uuid);
+    unsigned char index_md5[MD5_DIGEST_LENGTH];
+    MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
+    buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+
    time(&ScanCtx.index.desc.timestamp);
    strcpy(ScanCtx.index.desc.version, Version);
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -149,6 +150,14 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.text_ctx.log = _log;
    ScanCtx.text_ctx.logf = _logf;

+    // MSDOC
+    ScanCtx.msdoc_ctx.tn_size = args->size;
+    ScanCtx.msdoc_ctx.content_size = args->content_size;
+    ScanCtx.msdoc_ctx.log = _log;
+    ScanCtx.msdoc_ctx.logf = _logf;
+    ScanCtx.msdoc_ctx.store = _store;
+    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
+
    ScanCtx.threads = args->threads;
    ScanCtx.depth = args->depth;

@@ -210,7 +219,7 @@ void sist2_scan(scan_args_t *args) {
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
                incremental_read(ScanCtx.original_table, file_path);
            }
        }
@@ -225,8 +234,6 @@ void sist2_scan(scan_args_t *args) {
    tpool_wait(ScanCtx.pool);
    tpool_destroy(ScanCtx.pool);

-    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
-
    if (args->incremental != NULL) {
        char dst_path[PATH_MAX];
        snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -242,7 +249,7 @@ void sist2_scan(scan_args_t *args) {
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
                incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
            }
        }
@@ -257,6 +264,8 @@ void sist2_scan(scan_args_t *args) {
        store_destroy(source_tags);
    }

+    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
+
    store_destroy(ScanCtx.index.store);
 }

@@ -319,7 +328,7 @@ void sist2_index(index_args_t *args) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
            snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
-            read_index(file_path, desc.uuid, desc.type, f);
+            read_index(file_path, desc.id, desc.type, f);
        }
    }
    closedir(dir);
@@ -329,7 +338,7 @@ void sist2_index(index_args_t *args) {
    tpool_destroy(IndexCtx.pool);

    if (!args->print) {
-        finish_indexer(args->script, args->async_script, desc.uuid);
+        finish_indexer(args->script, args->async_script, desc.id);
    }

    store_destroy(IndexCtx.tag_store);
@@ -349,7 +358,7 @@ void sist2_exec_script(exec_args_t *args) {

    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)

-    execute_update_script(args->script, args->async_script, desc.uuid);
+    execute_update_script(args->script, args->async_script, desc.id);
    free(args->script);
 }

--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -46,29 +46,31 @@ void parse(void *arg) {
    parse_job_t *job = arg;
    document_t doc;

-    int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
-    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
-        incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
-        return;
-    }
-
    doc.filepath = job->filepath;
    doc.ext = (short) job->ext;
    doc.base = (short) job->base;
+
+    char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
+    MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
+
    doc.meta_head = NULL;
    doc.meta_tail = NULL;
    doc.mime = 0;
    doc.size = job->vfile.info.st_size;
-    doc.ino = job->vfile.info.st_ino;
    doc.mtime = job->vfile.info.st_mtim.tv_sec;

-    uuid_generate(doc.uuid);
+    int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
+    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
+        incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
+        return;
+    }
+
    char *buf[MAGIC_BUF_SIZE];

    if (LogCtx.very_verbose) {
-        char uuid_str[UUID_STR_LEN];
-        uuid_unparse(doc.uuid, uuid_str);
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
+        char path_md5_str[MD5_STR_LENGTH];
+        buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
    }

    if (job->vfile.info.st_size == 0) {
@@ -86,7 +88,8 @@ void parse(void *arg) {

        // Get mime type with libmagic
        if (!job->vfile.is_fs_file) {
-            LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
+            LOG_WARNING(job->filepath,
+                        "Guessing mime type with libmagic inside archive files is not currently supported");
            goto abort;
        }

@@ -162,16 +165,22 @@ void parse(void *arg) {
        parse_sidecar(&job->vfile, &doc);
        CLOSE_FILE(job->vfile)
        return;
+    } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
+        parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
    }

    abort:

    //Parent meta
-    if (!uuid_is_null(job->parent)) {
-        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
+    if (!md5_digest_is_null(job->parent)) {
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
        meta_parent->key = MetaParent;
-        uuid_unparse(job->parent, meta_parent->str_val);
+        buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
        APPEND_META((&doc), meta_parent)
+
+        doc.has_parent = TRUE;
+    } else {
+        doc.has_parent = FALSE;
    }

    write_document(&doc);
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@@ -23,11 +23,11 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    }
    char *json_str = cJSON_PrintUnformatted(json);

-    char filepath[PATH_MAX];
-    memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len);
-    *(filepath + doc->ext - 1) = '\0';
+    unsigned char path_md5[MD5_DIGEST_LENGTH];
+    MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
+        path_md5);

-    store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1);
+    store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);

    cJSON_Delete(json);
    free(json_str);
--- a/src/sist.h
+++ b/src/sist.h
@@ -23,9 +23,10 @@
 #undef ABS
 #define ABS(a)	   (((a) < 0) ? -(a) : (a))

-#define UUID_STR_LEN 37
 #define UNUSED(x) __attribute__((__unused__))  x

+#define MD5_STR_LENGTH 33
+
 #include "util.h"
 #include "log.h"
 #include "types.h"
@@ -47,5 +48,4 @@
 #include <errno.h>
 #include <ctype.h>

-
 #endif
--- a/src/static/css/ion.rangeSlider.skinFlat.min.css
+++ b/src/static/css/ion.rangeSlider.skinFlat.min.css
@@ -1 +1 @@
-.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("../img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
+.irs-bar,.irs-bar-edge,.irs-line-left,.irs-line-mid,.irs-line-right,.irs-slider{background:url("./img/sprite-skin-flat.png") repeat-x}.irs{height:40px}.irs-with-grid{height:60px}.irs-line{height:12px;top:25px}.irs-line-left{height:12px;background-position:0 -30px}.irs-line-mid{height:12px;background-position:0 0}.irs-line-right{height:12px;background-position:100% -30px}.irs-bar{height:12px;top:25px;background-position:0 -60px}.irs-bar-edge{top:25px;height:12px;width:9px;background-position:0 -90px}.irs-shadow{height:3px;top:34px;background:#000;opacity:0.25}.lt-ie9 .irs-shadow{filter: alpha(opacity=25)}.irs-slider{width:16px;height:18px;top:22px;background-position:0 -120px}.irs-slider.state_hover,.irs-slider:hover{background-position:0 -150px}.irs-max,.irs-min{color:#999;font-size:10px;line-height:1.333;text-shadow:none;top:0;padding:1px 3px;background:#e1e4e9;-moz-border-radius:4px;border-radius:4px}.irs-from,.irs-single,.irs-to{color:#fff;font-size:10px;line-height:1.333;text-shadow:none;padding:1px 5px;background:#2196F3;-moz-border-radius:4px;border-radius:4px}.irs-from:after,.irs-single:after,.irs-to:after{position:absolute;display:block;content:"";bottom:-6px;left:50%;width:0;height:0;margin-left:-3px;overflow:hidden;border:3px solid transparent;border-top-color:#2196F3}.irs-grid-pol{background:#e1e4e9}.irs-grid-text{color:#999}.irs-disabled{}
--- a/src/static/js/8_md5.min.js
+++ b/src/static/js/8_md5.min.js
@@ -0,0 +1 @@
+!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);
--- a/src/static/js/dom.js
+++ b/src/static/js/dom.js
@@ -22,7 +22,7 @@ function gifOver(thumbnail, hit) {
    thumbnail.addEventListener("mouseout", function () {
        //Reset timer
        thumbnail.mouseStayedOver = false;
-        thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
+        thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
    })
 }

@@ -419,7 +419,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
            thumbnail.setAttribute("class", "card-img-top fit");
        }
    }
-    thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
+    thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);

    if (shouldDisplayRawImage(hit)) {
        thumbnail.addEventListener("click", () => {
--- a/src/static/js/search.js
+++ b/src/static/js/search.js
@@ -165,6 +165,9 @@ window.onload = () => {
            }
        }
    });
+
+    initTagTree();
+    updateTagTree();
 };

 function saveTag(tag, hit) {
@@ -174,7 +177,7 @@ function saveTag(tag, hit) {
        delete: false,
        name: tag,
        doc_id: hit["_id"],
-        relpath: relPath
+        path_md5: md5(relPath)
    }).then(() => {
        tagBar.blur();
        $("#tagModal").modal("hide");
@@ -188,6 +191,8 @@ function saveTag(tag, hit) {
            hideAfter: 3000,
            loaderBg: "#08c7e8",
        });
+
+        window.setTimeout(updateTagTree, 2000);
    })
 }

@@ -198,7 +203,7 @@ function deleteTag(tag, hit) {
        delete: true,
        name: tag,
        doc_id: hit["_id"],
-        relpath: relPath
+        path_md5: md5(relPath)
    }).then(() => {
        $.toast({
            heading: "Tag deleted",
@@ -210,6 +215,8 @@ function deleteTag(tag, hit) {
            hideAfter: 3000,
            loaderBg: "#08c7e8",
        });
+
+        window.setTimeout(updateTagTree, 2000);
    })
 }

@@ -313,25 +320,8 @@ $.jsonPost("es", {
    mimeTree.node("any").select();
 });

-// Tags tree
-$.jsonPost("es", {
-    aggs: {
-        tags: {
-            terms: {
-                field: "tag",
-                size: 10000
-            }
-        }
-    },
-    size: 0,
-}).then(resp => {
-    resp["aggregations"]["tags"]["buckets"]
-        .sort((a, b) => a["key"].localeCompare(b["key"]))
-        .forEach(bucket => {
-            addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
-        });
-
-    tagMap.push({"text": "All", "id": "any"});
+function initTagTree() {
+    tagMap = [{text: "All", id: "any"}];
    tagTree = new InspireTree({
        selection: {
            mode: 'checkbox'
@@ -346,8 +336,34 @@ $.jsonPost("es", {
    });
    tagTree.on("node.state.changed", handleTreeClick(tagTree));
    tagTree.node("any").select();
+}
+
+function updateTagTree() {
+    $.jsonPost("es", {
+        aggs: {
+            tags: {
+                terms: {
+                    field: "tag",
+                    size: 10000
+                }
+            }
+        },
+        size: 0,
+    }).then(resp => {
+        tagMap = [];
+        resp["aggregations"]["tags"]["buckets"]
+            .sort((a, b) => a["key"].localeCompare(b["key"]))
+            .forEach(bucket => {
+                addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
+            });
+
+        tagTree.removeAll();
+        tagMap.push({text: "All", id: "any"})
+        tagTree.addNodes(tagMap);
        searchBusy = false;
    });
+}
+

 function addTag(map, tag, id, count) {
    // let tags = tag.split("#")[0].split(".");
@@ -604,6 +620,7 @@ function search(after = null) {
        hits.forEach(hit => {
            hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
            hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
+            hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
        });

        if (!after) {
--- a/src/static/search.html
+++ b/src/static/search.html
@@ -12,9 +12,9 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.8.5</span>
+    <span class="badge badge-pill version">2.9.0</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
-    <a class="btn ml-auto" href="/stats">Stats</a>
+    <a class="btn ml-auto" href="stats">Stats</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
    </button>
    <button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
--- a/src/static/stats.html
+++ b/src/static/stats.html
@@ -10,7 +10,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.8.5</span>
+    <span class="badge badge-pill version">2.9.0</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a style="margin-left: auto" class="btn" href="/">Back</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -29,13 +29,13 @@
    </div>

    <div id="treemap-card" class="stats-card">
-        <button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
+        <button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
        <button class="btn stats-btn" onclick="exportTreemap()">Export</button>
        <svg id="treemap"></svg>
    </div>

    <div id="graphs-card" class="stats-card">
-        <button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
+        <button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
        <div class="graph">
            <svg id="agg_mime_size"></svg>
        </div>
@@ -727,7 +727,7 @@ function updateStats() {

    const indexId = $("#indices").val();

-    d3.csv(`/s/${indexId}/1`).then(tabularData => {
+    d3.csv(`./s/${indexId}/1`).then(tabularData => {
        tabularData.forEach(row => {
            row.taxonomy = row.path.split("/");
            row.size = Number(row.size);
@@ -742,16 +742,16 @@ function updateStats() {
        }
    });

-    d3.csv(`/s/${indexId}/2`).then(tabularData => {
+    d3.csv(`./s/${indexId}/2`).then(tabularData => {
        mimeBarSize(tabularData.slice(), mimeSvgSize);
        mimeBarCount(tabularData.slice(), mimeSvgCount);
    });

-    d3.csv(`/s/${indexId}/3`).then(tabularData => {
+    d3.csv(`./s/${indexId}/3`).then(tabularData => {
        sizeHistogram(tabularData, sizeHistogramSvg);
    });

-    d3.csv(`/s/${indexId}/4`).then(tabularData => {
+    d3.csv(`./s/${indexId}/4`).then(tabularData => {
        dateHistogram(tabularData, dateHistogramSvg);
    });

@@ -789,7 +789,15 @@ window.onload = function () {

 function fullScreen(selector) {
    const card = document.getElementById(selector);
+    const btn = document.getElementById(selector + "-enlarge");
+
    card.classList.toggle("full-screen");
+
+    if (card.classList.contains("full-screen")) {
+        btn.innerText = "Shrink";
+    } else {
+        btn.innerText = "Enlarge";
+    }
 }

 function exportTreemap() {
--- a/src/stats.c
+++ b/src/stats.c
@@ -2,8 +2,6 @@
 #include "io/serialize.h"
 #include "ctx.h"

-#include <glib.h>
-
 static GHashTable *FlatTree;
 static GHashTable *BufferTable;

@@ -22,7 +20,7 @@ typedef struct {
    long count;
 } agg_t;

-void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {
+void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {

    if (cJSON_GetObjectItem(document, "parent") != NULL) {
        return;
@@ -103,8 +101,8 @@ void read_index_into_tables(index_t *index) {
    while ((de = readdir(dir)) != NULL) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
-            snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
-            read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
+            snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
+            read_index(file_path, index->desc.id, index->desc.type, fill_tables);
        }
    }
    closedir(dir);
--- a/src/types.h
+++ b/src/types.h
@@ -6,7 +6,7 @@
 #define INDEX_VERSION_EXTERNAL "_external_v1"

 typedef struct index_descriptor {
-    char uuid[UUID_STR_LEN];
+    char id[MD5_STR_LENGTH];
    char version[64];
    long timestamp;
    char root[PATH_MAX];
--- a/src/util.c
+++ b/src/util.c
@@ -2,7 +2,6 @@
 #include "src/ctx.h"

 #include <wordexp.h>
-#include <glib.h>

 #define PBSTR "========================================"
 #define PBWIDTH 40
@@ -125,7 +124,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
 }

 GHashTable *incremental_get_table() {
-    GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
+    GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
    return file_table;
 }

--- a/src/util.h
+++ b/src/util.h
@@ -10,6 +10,8 @@
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"

+#define MD5_STR_LENGTH 33
+

 char *abspath(const char *path);

@@ -21,25 +23,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);

 GHashTable *incremental_get_table();

-__always_inline
-static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
-    g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
-}
-
-__always_inline
-static int incremental_get(GHashTable *table, unsigned long inode_no) {
-    if (table != NULL) {
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
-    } else {
-        return 0;
-    }
-}
-
-__always_inline
-static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
-    return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
-}
-

 const char *find_file_in_paths(const char **paths, const char *filename);

@@ -48,4 +31,95 @@ void str_escape(char *dst, const char *str);

 void str_unescape(char *dst, const char *str);

+static int hex2buf(const char *str, int len, unsigned char *bytes) {
+    static const uint8_t hashmap[] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+            0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    };
+
+    for (int pos = 0; pos < len; pos += 2) {
+        int idx0 = (uint8_t) str[pos + 0];
+        int idx1 = (uint8_t) str[pos + 1];
+        bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
+    }
+    return TRUE;
+}
+
+__always_inline
+static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
+    static const char hexdig[] = "0123456789abcdef";
+
+    const unsigned char *p;
+    size_t i;
+
+    char *s = hex_string;
+    for (i = 0, p = buf; i < buflen; i++, p++) {
+        *s++ = hexdig[(*p >> 4) & 0x0f];
+        *s++ = hexdig[*p & 0x0f];
+    }
+    *s = '\0';
+}
+
+
+__always_inline
+static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
+    return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
+}
+
+
+__always_inline
+static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
+    char *ptr = malloc(MD5_STR_LENGTH);
+    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
+}
+
+__always_inline
+static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
+    if (table != NULL) {
+        char md5_str[MD5_STR_LENGTH];
+        buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
+        return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
+    } else {
+        return 0;
+    }
+}
+
+__always_inline
+static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
+    char *ptr = malloc(MD5_STR_LENGTH);
+    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
+}
+
 #endif
--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -36,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, int le

 index_t *get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
-        if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
+        if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
            return &WebCtx.indices[i];
        }
    }
@@ -73,17 +73,17 @@ void stats(struct mg_connection *nc) {

 void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (path->len != UUID_STR_LEN + 4) {
+    if (path->len != MD5_STR_LENGTH + 4) {
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_uuid[UUID_STR_LEN];
-    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
-    *(arg_uuid + UUID_STR_LEN - 1) = '\0';
+    char arg_md5[MD5_STR_LENGTH];
+    memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
+    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';

-    index_t *index = get_index_by_id(arg_uuid);
+    index_t *index = get_index_by_id(arg_md5);
    if (index == NULL) {
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
@@ -91,7 +91,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
    }

    const char *file;
-    switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
+    switch (atoi(hm->uri.p + 3 + MD5_STR_LENGTH)) {
        case 1:
            file = "treemap.csv";
            break;
@@ -179,29 +179,23 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {

 void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (path->len != UUID_STR_LEN * 2 + 2) {
+    if (path->len != 68) {
        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_uuid[UUID_STR_LEN];
-    char arg_index[UUID_STR_LEN];
+    char arg_file_md5[MD5_STR_LENGTH];
+    char arg_index[MD5_STR_LENGTH];

-    memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
-    *(arg_index + UUID_STR_LEN - 1) = '\0';
-    memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
-    *(arg_uuid + UUID_STR_LEN - 1) = '\0';
+    memcpy(arg_index, hm->uri.p + 3, MD5_STR_LENGTH);
+    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
+    memcpy(arg_file_md5, hm->uri.p + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
+    *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';

-    uuid_t uuid;
-    int ret = uuid_parse(arg_uuid, uuid);
-    if (ret != 0) {
-        LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
-        mg_http_send_error(nc, 404, NULL);
-        nc->flags |= MG_F_SEND_AND_CLOSE;
-        return;
-    }
+    unsigned char md5_buf[MD5_DIGEST_LENGTH];
+    hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);

    store_t *store = get_store(arg_index);
    if (store == NULL) {
@@ -212,7 +206,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
    }

    size_t data_len = 0;
-    char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
+    char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
    if (data_len != 0) {
        send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
        mg_send(nc, data, data_len);
@@ -305,7 +299,7 @@ void index_info(struct mg_connection *nc) {
        cJSON *idx_json = cJSON_CreateObject();
        cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
        cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
-        cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
+        cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
        cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
        cJSON_AddItemToArray(arr, idx_json);
    }
@@ -323,18 +317,18 @@ void index_info(struct mg_connection *nc) {

 void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (path->len != UUID_STR_LEN + 2) {
+    if (path->len != MD5_STR_LENGTH + 2) {
        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_uuid[UUID_STR_LEN];
-    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
-    *(arg_uuid + UUID_STR_LEN - 1) = '\0';
+    char arg_md5[MD5_STR_LENGTH];
+    memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
+    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';

-    cJSON *doc = elastic_get_document(arg_uuid);
+    cJSON *doc = elastic_get_document(arg_md5);
    cJSON *source = cJSON_GetObjectItem(doc, "_source");

    cJSON *index_id = cJSON_GetObjectItem(source, "index");
@@ -364,18 +358,18 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_

 void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (path->len != UUID_STR_LEN + 2) {
+    if (path->len != MD5_STR_LENGTH + 2) {
        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_uuid[UUID_STR_LEN];
-    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
-    *(arg_uuid + UUID_STR_LEN - 1) = '\0';
+    char arg_md5[MD5_STR_LENGTH];
+    memcpy(arg_md5, hm->uri.p + 3, MD5_STR_LENGTH);
+    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';

-    const char *next = arg_uuid;
+    const char *next = arg_md5;
    cJSON *doc = NULL;
    cJSON *index_id = NULL;
    cJSON *source = NULL;
@@ -430,7 +424,7 @@ void status(struct mg_connection *nc) {
 typedef struct {
    char *name;
    int delete;
-    char *relpath;
+    char *path_md5_str;
    char *doc_id;
 } tag_req_t;

@@ -450,8 +444,9 @@ tag_req_t *parse_tag_request(cJSON *json) {
        return NULL;
    }

-    cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
-    if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
+    cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
+    if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
+        strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
        return NULL;
    }

@@ -463,23 +458,23 @@ tag_req_t *parse_tag_request(cJSON *json) {
    tag_req_t *req = malloc(sizeof(tag_req_t));
    req->delete = arg_delete->valueint;
    req->name = arg_name->valuestring;
-    req->relpath = arg_relpath->valuestring;
+    req->path_md5_str = arg_path_md5->valuestring;
    req->doc_id = arg_doc_id->valuestring;

    return req;
 }

 void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
-    if (path->len != UUID_STR_LEN + 4) {
+    if (path->len != MD5_STR_LENGTH + 4) {
        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_index[UUID_STR_LEN];
-    memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
-    *(arg_index + UUID_STR_LEN - 1) = '\0';
+    char arg_index[MD5_STR_LENGTH];
+    memcpy(arg_index, hm->uri.p + 5, MD5_STR_LENGTH);
+    *(arg_index + MD5_STR_LENGTH - 1) = '\0';

    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
        LOG_DEBUG("serve.c", "Invalid tag request")
@@ -514,7 +509,7 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
    cJSON *arr = NULL;

    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
+    const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
    if (data_len == 0) {
        arr = cJSON_CreateArray();
    } else {
@@ -574,7 +569,8 @@ void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path)
    }

    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);
+    store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
+    store_flush(store);

    free(arg_req);
    free(json_str);
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/tests/test_scan.py
+++ b/tests/test_scan.py
@@ -0,0 +1,75 @@
+import unittest
+import subprocess
+import shutil
+import json
+import os
+
+TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
+
+
+def copy_files(files):
+    base = os.path.basename(files)
+    new_path = os.path.join("/tmp/sist2_test/", base)
+
+    shutil.rmtree(new_path, ignore_errors=True)
+    shutil.copytree(files, new_path)
+    return new_path
+
+
+def sist2(*args):
+    return subprocess.check_output(
+        args=["./sist2_debug", *args],
+    )
+
+
+def sist2_index(files, *args):
+    path = copy_files(files)
+
+    shutil.rmtree("i", ignore_errors=True)
+    sist2("scan", path, "-o", "i", *args)
+    return iter(sist2_index_to_dict("i"))
+
+
+def sist2_incremental_index(files, func=None, *args):
+    path = copy_files(files)
+
+    if func:
+        func(path)
+
+    shutil.rmtree("i_inc", ignore_errors=True)
+    sist2("scan", path, "-o", "i_inc", "--incremental", "i", *args)
+    return iter(sist2_index_to_dict("i_inc"))
+
+
+def sist2_index_to_dict(index):
+    res = subprocess.check_output(
+        args=["./sist2_debug", "index", "--print", index],
+    )
+
+    for line in res.splitlines():
+        if line:
+            yield json.loads(line)
+
+
+class ScanTest(unittest.TestCase):
+
+    def test_incremental1(self):
+        def remove_files(path):
+            os.remove(os.path.join(path, "msdoc/test1.doc"))
+            os.remove(os.path.join(path, "msdoc/test2.doc"))
+
+        def add_files(path):
+            with open(os.path.join(path, "newfile1"), "w"):
+                pass
+            with open(os.path.join(path, "newfile2"), "w"):
+                pass
+            with open(os.path.join(path, "newfile3"), "w"):
+                pass
+
+        file_count = sum(1 for _ in sist2_index(TEST_FILES))
+        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
+        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/third-party/argparse
+++ b/third-party/argparse
--- a/third-party/libscan
+++ b/third-party/libscan
Author	SHA1	Message	Date
simon987	013c54daa0	Fix tag delete	2020-12-31 12:55:37 -05:00
simon987	54308ef5e2	Update tags tab automatically	2020-12-31 12:45:23 -05:00
simon987	638c2a5c1a	Update binary names (again)	2020-12-31 11:03:25 -05:00
simon987	9587caddd9	Don't build tests by default, fix enlarge button	2020-12-31 10:55:34 -05:00
simon987	f5bbe0dc97	Update binary names	2020-12-31 10:54:30 -05:00
simon987	f87eac1f90	Update submodules	2020-12-31 10:26:05 -05:00
simon987	ddafbab6a6	Update readme	2020-12-31 10:26:05 -05:00
simon987	b91d574756	Add md5 client-side lib	2020-12-31 10:26:05 -05:00
simon987	576140e542	fix submodules	2020-12-31 10:26:05 -05:00
simon987	050c1283a3	Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump	2020-12-31 10:26:05 -05:00
simon987	c6e1ba03bc	Better support for .doc files	2020-12-31 10:26:05 -05:00
simon987	10e32f707f	Update README.md	2020-12-31 10:26:05 -05:00
simon987	86e83bafaf	Update README.md	2020-12-31 10:26:05 -05:00
simon987	51a40c8819	Add .doc support	2020-12-31 10:26:05 -05:00
acc557	36281a5108	Use relative path for loading csv in stats	2020-12-31 10:26:05 -05:00
acc557	76a0bda48b	Update search.html Fix relative stats URL	2020-12-31 10:26:05 -05:00
simon987	0cf29a660c	Fix relative image URL #122	2020-12-31 10:26:05 -05:00
simon987	6cd0741848	update build instructions	2020-12-31 10:26:05 -05:00
simon987	bc120f349d	Setup ARM CI builds	2020-12-23 10:26:26 -05:00
simon987	8cac8c98d7	Update dev builds template	2020-12-22 14:45:16 -05:00
simon987	30921ac52e	Setup drone ci	2020-12-22 14:09:45 -05:00
				`@@ -0,0 +1 @@`
				!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16\|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)\|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r\|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e\|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t\|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]\|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8n.length,e=0;e<r;e+=8)t[e>>5]\|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);