Merge pull request #284 from simon987/dev

v2.12.0
2025-11-03 01:06:52 +00:00 · 2022-04-17 13:38:42 -04:00 · 2022-04-17 13:38:42 -04:00 · fe56da95d5
commit fe56da95d5
parent 25ab883063 9f2ad58f78
56 changed files with 803 additions and 459 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -7,3 +7,6 @@
 [submodule "third-party/libscan/third-party/antiword"]
 	path = third-party/libscan/third-party/antiword
 	url = https://github.com/simon987/antiword
+[submodule "third-party/libscan/third-party/libmobi"]
+	path = third-party/libscan/third-party/libmobi
+	url = https://github.com/bfabiszewski/libmobi
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
 project(sist2 C)

 option(SIST_DEBUG "Build a debug executable" on)
+option(SIST_FAST "Enable more optimisation flags" off)
 option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)

 add_compile_definitions(
@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
 find_package(CURL CONFIG REQUIRED)
+find_library(MAGIC_LIB
+        NAMES libmagic.so.1 magic
+        PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
+)


 target_include_directories(
@ -93,10 +98,22 @@ if (SIST_DEBUG)
            PROPERTIES
            OUTPUT_NAME sist2_debug
    )
+elseif (SIST_FAST)
+    target_compile_options(
+            sist2
+            PRIVATE
+
+            -Ofast
+            -march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            -freciprocal-math
+    )
 else ()
    target_compile_options(
            sist2
            PRIVATE
+
            -Ofast
            -fno-stack-protector
            -fomit-frame-pointer
@ -121,11 +138,12 @@ target_link_libraries(
        CURL::libcurl

        pthread
-        magic

        c

        scan
+
+        ${MAGIC_LIB}
 )

 add_custom_target(
--- a/2
+++ b/2
@ -9,7 +9,7 @@ RUN strip sist2 || mv sist2_debug sist2

 FROM --platform="linux/amd64" ubuntu:21.10

-RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
+RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
--- a/README.md
+++ b/README.md
@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
 Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
    2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
       recommended!)*
-    3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
+    3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`

 1. See [Usage guide](docs/USAGE.md)

--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@ -103,7 +103,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
 * `--thumbnail-count`
    Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
    will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image 
-    every ~5s). Set to 0 to completely disable thumbnails.
+    every ~7s). Set to 0 to completely disable thumbnails.
 * `--content-size` 
    Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
    Repeated whitespace and special characters do not count toward this limit.
--- a/schema/settings.json
+++ b/schema/settings.json
@ -3,7 +3,7 @@
    "refresh_interval": "30s",
    "codec": "best_compression",
    "number_of_replicas": 0,
-    "highlight.max_analyzed_offset": 10000000
+    "highlight.max_analyzed_offset": 1000000
  },
  "analysis": {
    "tokenizer": {
@ -55,5 +55,37 @@
        ]
      }
    }
+  },
+  "mappings": {
+    "dynamic_templates": [
+      {
+        "keyword_fields": {
+          "match_mapping_type": "string",
+          "match":   "kw_*",
+          "mapping": {
+            "type": "keyword"
+          }
+        }
+      },
+      {
+        "integer_fields": {
+          "match_mapping_type": "*",
+          "match":   "int_*",
+          "mapping": {
+            "type": "integer"
+          }
+        }
+      },
+      {
+        "meta_fields": {
+          "match_mapping_type": "*",
+          "match":   "mt_*",
+          "mapping": {
+            "type": "keyword",
+            "index": false
+          }
+        }
+      }
+    ]
  }
 }
--- a/scripts/start_dev_es.sh
+++ b/scripts/start_dev_es.sh
@ -1,2 +1,3 @@
-docker run --rm -it -p 9200:9200 -e "discovery.type=single-node" \
+docker run --rm -it --name "sist2-dev-es"\
+       	-p 9200:9200 -e "discovery.type=single-node" \
 	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
--- a/sist2-vue/dist/css/index.css
+++ b/sist2-vue/dist/css/index.css
--- a/sist2-vue/dist/js/chunk-vendors.js
+++ b/sist2-vue/dist/js/chunk-vendors.js
--- a/sist2-vue/dist/js/index.js
+++ b/sist2-vue/dist/js/index.js
--- a/sist2-vue/package-lock.json
+++ b/sist2-vue/package-lock.json
@ -12,7 +12,6 @@
        "axios": "^0.25.0",
        "bootstrap-vue": "^2.21.2",
        "core-js": "^3.6.5",
-        "crypto-es": "^1.2.7",
        "d3": "^5.16.0",
        "date-fns": "^2.21.3",
        "dom-to-image": "^2.6.0",
@ -5261,11 +5260,6 @@
        "node": "*"
      }
    },
-    "node_modules/crypto-es": {
-      "version": "1.2.7",
-      "resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
-      "integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
-    },
    "node_modules/css-color-names": {
      "version": "0.0.4",
      "resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@ -9742,9 +9736,9 @@
      }
    },
    "node_modules/minimist": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
-      "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
+      "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
      "dev": true
    },
    "node_modules/minipass": {
@ -19621,11 +19615,6 @@
        "randomfill": "^1.0.3"
      }
    },
-    "crypto-es": {
-      "version": "1.2.7",
-      "resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
-      "integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
-    },
    "css-color-names": {
      "version": "0.0.4",
      "resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@ -23335,9 +23324,9 @@
      }
    },
    "minimist": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
-      "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
+      "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
      "dev": true
    },
    "minipass": {
--- a/sist2-vue/package.json
+++ b/sist2-vue/package.json
@ -11,7 +11,6 @@
    "axios": "^0.25.0",
    "bootstrap-vue": "^2.21.2",
    "core-js": "^3.6.5",
-    "crypto-es": "^1.2.7",
    "d3": "^5.16.0",
    "date-fns": "^2.21.3",
    "dom-to-image": "^2.6.0",
--- a/sist2-vue/src/Sist2Api.ts
+++ b/sist2-vue/src/Sist2Api.ts
@ -1,6 +1,5 @@
 import axios from "axios";
 import {ext, strUnescape, lum} from "./util";
-import CryptoES from 'crypto-es';

 export interface EsTag {
    id: string
@ -30,7 +29,6 @@ export interface EsHit {
    _index: string
    _id: string
    _score: number
-    _path_md5: string
    _type: string
    _tags: Tag[]
    _seq: number
@ -249,11 +247,6 @@ class Sist2Api {
                res.hits.hits.forEach((hit: EsHit) => {
                    hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
                    hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
-                    hit["_path_md5"] = CryptoES.MD5(
-                        hit["_source"]["path"] +
-                        (hit["_source"]["path"] ? "/" : "") +
-                        hit["_source"]["name"] + ext(hit)
-                    ).toString();

                    this.setHitProps(hit);
                    this.setHitTags(hit);
@ -343,10 +336,6 @@ class Sist2Api {
        };
    }

-    getDocInfo(docId: string) {
-        return axios.get(`${this.baseUrl}d/${docId}`);
-    }
-
    getTags() {
        return this.esQuery({
            aggs: {
@ -380,8 +369,7 @@ class Sist2Api {
        return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
            delete: false,
            name: tag,
-            doc_id: hit["_id"],
-            path_md5: hit._path_md5
+            doc_id: hit["_id"]
        });
    }

@ -389,8 +377,7 @@ class Sist2Api {
        return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
            delete: true,
            name: tag,
-            doc_id: hit["_id"],
-            path_md5: hit._path_md5
+            doc_id: hit["_id"]
        });
    }

--- a/sist2-vue/src/Sist2Query.ts
+++ b/sist2-vue/src/Sist2Query.ts
@ -69,7 +69,7 @@ interface SortMode {

 class Sist2Query {

-    searchQuery(): any {
+    searchQuery(blankSearch: boolean = false): any {

        const getters = store.getters;

@ -93,22 +93,6 @@ class Sist2Query {
            {terms: {index: selectedIndexIds}}
        ] as any[];

-        if (sizeMin && sizeMax) {
-            filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
-        } else if (sizeMin) {
-            filters.push({range: {size: {gte: sizeMin}}})
-        } else if (sizeMax) {
-            filters.push({range: {size: {lte: sizeMax}}})
-        }
-
-        if (dateMin && dateMax) {
-            filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
-        } else if (dateMin) {
-            filters.push({range: {mtime: {gte: dateMin}}})
-        } else if (dateMax) {
-            filters.push({range: {mtime: {lte: dateMax}}})
-        }
-
        const fields = [
            "name^8",
            "content^3",
@ -128,20 +112,39 @@ class Sist2Query {
            fields.push("name.nGram^3");
        }

-        const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
-        if (path !== "") {
-            filters.push({term: {path: path}})
-        }
+        if (!blankSearch) {
+            if (sizeMin && sizeMax) {
+                filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
+            } else if (sizeMin) {
+                filters.push({range: {size: {gte: sizeMin}}})
+            } else if (sizeMax) {
+                filters.push({range: {size: {lte: sizeMax}}})
+            }

-        if (selectedMimeTypes.length > 0) {
-            filters.push({terms: {"mime": selectedMimeTypes}});
-        }
+            if (dateMin && dateMax) {
+                filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
+            } else if (dateMin) {
+                filters.push({range: {mtime: {gte: dateMin}}})
+            } else if (dateMax) {
+                filters.push({range: {mtime: {lte: dateMax}}})
+            }

-        if (selectedTags.length > 0) {
-            if (getters.optTagOrOperator) {
-                filters.push({terms: {"tag": selectedTags}});
-            } else {
-                selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
+            const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
+
+            if (path !== "") {
+                filters.push({term: {path: path}})
+            }
+
+            if (selectedMimeTypes.length > 0) {
+                filters.push({terms: {"mime": selectedMimeTypes}});
+            }
+
+            if (selectedTags.length > 0) {
+                if (getters.optTagOrOperator) {
+                    filters.push({terms: {"tag": selectedTags}});
+                } else {
+                    selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
+                }
            }
        }

@ -182,7 +185,7 @@ class Sist2Query {
            size: size,
        } as any;

-        if (!empty) {
+        if (!empty && !blankSearch) {
            q.query.bool.must = query;
        }

@ -207,7 +210,7 @@ class Sist2Query {
            };

            if (!legacyES) {
-                q.highlight.max_analyzed_offset = 9_999_999;
+                q.highlight.max_analyzed_offset = 999_999;
            }

            if (getters.optSearchInPath) {
@ -237,7 +240,7 @@ class Sist2Query {
                }
            }

-            if (!empty) {
+            if (!empty && !blankSearch) {
                q.query.function_score.query.bool.must.push(query);
            }
        }
--- a/sist2-vue/src/components/InfoTable.vue
+++ b/sist2-vue/src/components/InfoTable.vue
@ -72,6 +72,12 @@ export default {
        }
      });

+      Object.keys(src).forEach(key => {
+        if (key.startsWith("mt_") || key.startsWith("int_")) {
+          items.push({key: key, value: src[key]});
+        }
+      });
+
      // Exif GPS
      if ("exif_gps_longitude_dec" in src) {
        items.push({
--- a/sist2-vue/src/components/LazyContentDiv.vue
+++ b/sist2-vue/src/components/LazyContentDiv.vue
@ -1,11 +1,13 @@
 <template>
  <Preloader v-if="loading"></Preloader>
-  <div v-else-if="content" class="content-div">{{ content }}</div>
+  <div v-else-if="content" class="content-div" v-html="content"></div>
 </template>

 <script>
 import Sist2Api from "@/Sist2Api";
 import Preloader from "@/components/Preloader";
+import Sist2Query from "@/Sist2Query";
+import store from "@/store";

 export default {
  name: "LazyContentDiv",
@ -18,10 +20,72 @@ export default {
    }
  },
  mounted() {
-    Sist2Api.getDocInfo(this.docId).then(src => {
-      this.content = src.data.content;
+    const query = Sist2Query.searchQuery();
+
+    if (this.$store.state.optHighlight) {
+
+      const fields = this.$store.state.fuzzy
+          ? {"content.nGram": {}}
+          : {content: {}};
+
+      query.highlight = {
+        pre_tags: ["<mark>"],
+        post_tags: ["</mark>"],
+        number_of_fragments: 0,
+        fields,
+      };
+
+      if (!store.state.sist2Info.esVersionLegacy) {
+        query.highlight.max_analyzed_offset = 999_999;
+      }
+    }
+
+    if ("function_score" in query.query) {
+      query.query = query.query.function_score.query;
+    }
+
+    if (!("must" in query.query.bool)) {
+      query.query.bool.must = [];
+    } else if (!Array.isArray(query.query.bool.must)) {
+      query.query.bool.must = [query.query.bool.must];
+    }
+
+    query.query.bool.must.push({match: {_id: this.docId}});
+
+    delete query["sort"];
+    delete query["aggs"];
+    delete query["search_after"];
+    delete query.query["function_score"];
+
+    query._source = {
+      includes: ["content", "name", "path", "extension"]
+    }
+
+    query.size = 1;
+
+    Sist2Api.esQuery(query).then(resp => {
      this.loading = false;
-    })
+      if (resp.hits.hits.length === 1) {
+        this.content = this.getContent(resp.hits.hits[0]);
+      } else {
+        console.log("FIXME: could not get content")
+        console.log(resp)
+      }
+    });
+  },
+  methods: {
+    getContent(doc) {
+      if (!doc.highlight) {
+        return doc._source.content;
+      }
+
+      if (doc.highlight["content.nGram"]) {
+        return doc.highlight["content.nGram"][0];
+      }
+      if (doc.highlight.content) {
+        return doc.highlight.content[0];
+      }
+    }
  }
 }
 </script>
--- a/sist2-vue/src/components/Lightbox.vue
+++ b/sist2-vue/src/components/Lightbox.vue
@ -1,6 +1,7 @@
 <template>
-  <div>
+  <div :class="{'disable-animations': $store.state.optSimpleLightbox}">
    <FsLightbox
+        ref="lightbox"
        :key="lightboxKey"
        :toggler="showLightbox"
        :sources="lightboxSources"
@ -10,7 +11,7 @@
        :source-index="lightboxSlide"
        :custom-toolbar-buttons="customButtons"
        :slideshow-time="$store.getters.optLightboxSlideDuration * 1000"
-        :zoom-increment="0.5"
+        :zoom-increment="0.25"
        :load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent"
        :on-close="onClose"
        :on-open="onShow"
@ -29,6 +30,7 @@ export default {
  components: {FsLightbox},
  data() {
    return {
+      disableAnimations: true,
      customButtons: [
        {
          viewBox: "0 0 384.928 384.928",
@ -64,7 +66,84 @@ export default {
      return this.$store.getters["uiLightboxTypes"];
    }
  },
+  mounted() {
+    const listener = document.onkeydown;
+
+    document.onkeydown = (e) => {
+
+      const ret = this.keyDownListener(e)
+
+      if (listener && ret) {
+        return listener(e);
+      }
+    };
+  },
  methods: {
+    keyDownListener(e) {
+
+      const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
+
+      if (isLightboxOpen) {
+        return true;
+      }
+
+      const lightboxStore = this.$refs.lightbox.fsLightboxStore.slice(-1)[0];
+
+      switch (e.key) {
+        case " ": {
+          e.preventDefault();
+          e.stopPropagation();
+          e.stopImmediatePropagation();
+
+          // Find video at current slide, toggle play/pause
+          [...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
+            if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
+              const vid = elem.getElementsByTagName("video")[0];
+
+              if (vid) {
+                if (vid.paused) {
+                  vid.play();
+                } else {
+                  vid.pause()
+                }
+              }
+            }
+
+            return false;
+          });
+
+          return false;
+        }
+        case "ArrowUp":
+        case "k": {
+          if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
+            lightboxStore.core.thumbsToggler.toggleThumbs();
+          }
+          return false;
+        }
+        case "ArrowDown":
+        case "j": {
+          if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
+            lightboxStore.core.thumbsToggler.toggleThumbs();
+          }
+          return false;
+        }
+        case "h": {
+          if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
+            lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
+          }
+          return false;
+        }
+        case "l": {
+          if (lightboxStore.core.stageManager.getNextSlideIndex) {
+            lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
+          }
+          return false;
+        }
+      }
+
+      return true;
+    },
    onDownloadClick() {
      const url = this.lightboxSources[this.lightboxSlide];

@ -125,4 +204,20 @@ export default {
 .fslightbox-toolbar-button:nth-child(7) {
  order: 7;
 }
+
+.disable-animations .fslightbox-container {
+  background: rgba(30,30,30,.9);
+}
+
+.disable-animations .fslightbox-transform-transition {
+  transition: none;
+}
+
+.disable-animations .fslightbox-fade-in-strong {
+  animation: none;
+}
+
+.fslightbox-container video, .fslightbox-container img {
+  cursor: unset !important;
+}
 </style>
--- a/sist2-vue/src/components/TagPicker.vue
+++ b/sist2-vue/src/components/TagPicker.vue
@ -1,5 +1,13 @@
 <template>
-  <div id="tagTree"></div>
+  <div>
+    <b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
+      <b-form-input :value="filter"
+                    :placeholder="$t('tagFilter')"
+                    @input="onFilter($event)"></b-form-input>
+    </b-input-group>
+
+    <div id="tagTree"></div>
+  </div>
 </template>

 <script>
@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {

 export default {
  name: "TagPicker",
+  props: ["showSearchBar"],
  data() {
    return {
      tagTree: null,
      loadedFromArgs: false,
+      filter: ""
    }
  },
  mounted() {
@ -129,6 +139,10 @@ export default {
    });
  },
  methods: {
+    onFilter(value) {
+      this.filter = value;
+      this.tagTree.search(value);
+    },
    initializeTree() {
      const tagMap = [];
      this.tagTree = new InspireTree({
@ -163,7 +177,8 @@ export default {
      });
    },
    handleTreeClick(node, e) {
-      if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
+      if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
+          || e === "matched" || e === "hidden") {
        return;
      }

@ -180,7 +195,15 @@ export default {
 }
 </style>
 <style>
-.inspire-tree .focused>.wholerow {
+.inspire-tree .focused > .wholerow {
  border: none;
 }
+
+#tag-picker-filter-bar {
+  padding: 10px 4px 4px;
+}
+
+.theme-black .inspire-tree .matched > .wholerow {
+  background: rgba(251, 191, 41, 0.25);
+}
 </style>
--- a/sist2-vue/src/i18n/messages.ts
+++ b/sist2-vue/src/i18n/messages.ts
@ -16,6 +16,7 @@ export default {
        pages: "pages",
        mimeTypes: "Media types",
        tags: "Tags",
+        tagFilter: "Filter tags",
        help: {
            simpleSearch: "Simple search",
            advancedSearch: "Advanced search",
@ -72,7 +73,9 @@ export default {
            hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
            updateMimeMap: "Update the Media Types tree in real time",
            useDatePicker: "Use a Date Picker component rather than a slider",
-            vidPreviewInterval: "Video preview frame duration in ms"
+            vidPreviewInterval: "Video preview frame duration in ms",
+            simpleLightbox: "Disable animations in image viewer",
+            showTagPickerFilter: "Display the tag filter bar"
        },
        queryMode: {
            simple: "Simple",
@ -182,6 +185,7 @@ export default {
        pages: "pages",
        mimeTypes: "Types de médias",
        tags: "Tags",
+        tagFilter: "Filtrer les tags",
        help: {
            simpleSearch: "Recherche simple",
            advancedSearch: "Recherche avancée",
@ -239,7 +243,9 @@ export default {
            hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
            updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
            useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
-            vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
+            vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
+            simpleLightbox: "Désactiver les animations du visualiseur d'images",
+            showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
        },
        queryMode: {
            simple: "Simple",
@ -350,6 +356,7 @@ export default {
        pages: "页",
        mimeTypes: "文件类型",
        tags: "标签",
+        tagFilter: "筛选标签",
        help: {
            simpleSearch: "简易搜索",
            advancedSearch: "高级搜索",
@ -406,7 +413,9 @@ export default {
            hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
            updateMimeMap: "媒体类型树的实时更新",
            useDatePicker: "使用日期选择器组件而不是滑块",
-            vidPreviewInterval: "视频预览帧的持续时间，以毫秒为单位"
+            vidPreviewInterval: "视频预览帧的持续时间，以毫秒为单位",
+            simpleLightbox: "在图片查看器中，禁用动画",
+            showTagPickerFilter: "显示标签过滤栏"
        },
        queryMode: {
            simple: "简单",
--- a/sist2-vue/src/store/index.ts
+++ b/sist2-vue/src/store/index.ts
@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
 import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
 import {deserializeMimes, serializeMimes} from "@/util";

+const CONF_VERSION = 2;
+
 Vue.use(Vuex)

 export default new Vuex.Store({
@ -24,7 +26,6 @@ export default new Vuex.Store({
        sortMode: "score",

        fuzzy: false,
-        size: 60,

        optLang: "en",
        optLangIsDefault: true,
@ -32,6 +33,7 @@ export default new Vuex.Store({
        optTheme: "light",
        optDisplay: "grid",

+        optSize: 60,
        optHighlight: true,
        optTagOrOperator: false,
        optFuzzy: true,
@ -51,6 +53,8 @@ export default new Vuex.Store({
        optUpdateMimeMap: false,
        optUseDatePicker: false,
        optVidPreviewInterval: 700,
+        optSimpleLightbox: true,
+        optShowTagPickerFilter: true,

        _onLoadSelectedIndices: [] as string[],
        _onLoadSelectedMimeTypes: [] as string[],
@ -149,7 +153,7 @@ export default new Vuex.Store({
        setOptSuggestPath: (state, val) => state.optSuggestPath = val,
        setOptFragmentSize: (state, val) => state.optFragmentSize = val,
        setOptQueryMode: (state, val) => state.optQueryMode = val,
-        setOptResultSize: (state, val) => state.size = val,
+        setOptResultSize: (state, val) => state.optSize = val,
        setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,

        setOptTreemapType: (state, val) => state.optTreemapType = val,
@ -161,6 +165,8 @@ export default new Vuex.Store({
        setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
        setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
        setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
+        setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
+        setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,

        setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
        setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@ -239,6 +245,11 @@ export default new Vuex.Store({
            }
        },
        async updateArgs({state}, router: VueRouter) {
+
+            if (router.currentRoute.path !== "/") {
+                return;
+            }
+
            await router.push({
                query: {
                    q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@ -267,6 +278,8 @@ export default new Vuex.Store({
                }
            });

+            conf["version"] = CONF_VERSION;
+
            localStorage.setItem("sist2_configuration", JSON.stringify(conf));
        },
        loadConfiguration({state}) {
@ -274,6 +287,11 @@ export default new Vuex.Store({
            if (confString) {
                const conf = JSON.parse(confString);

+                if (!("version" in conf) || conf["version"] != CONF_VERSION) {
+                    localStorage.removeItem("sist2_configuration");
+                    window.location.reload();
+                }
+
                Object.keys(state).forEach((key) => {
                    if (key.startsWith("opt")) {
                        (state as any)[key] = conf[key];
@ -335,7 +353,7 @@ export default new Vuex.Store({
        searchText: state => state.searchText,
        pathText: state => state.pathText,
        fuzzy: state => state.fuzzy,
-        size: state => state.size,
+        size: state => state.optSize,
        sortMode: state => state.sortMode,
        lastQueryResult: state => state.lastQueryResults,
        lastDoc: function (state): EsHit | null {
@ -373,10 +391,12 @@ export default new Vuex.Store({
        optTreemapColor: state => state.optTreemapColor,
        optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
        optLightboxSlideDuration: state => state.optLightboxSlideDuration,
-        optResultSize: state => state.size,
+        optResultSize: state => state.optSize,
        optHideLegacy: state => state.optHideLegacy,
        optUpdateMimeMap: state => state.optUpdateMimeMap,
        optUseDatePicker: state => state.optUseDatePicker,
        optVidPreviewInterval: state => state.optVidPreviewInterval,
+        optSimpleLightbox: state => state.optSimpleLightbox,
+        optShowTagPickerFilter: state => state.optShowTagPickerFilter,
    }
 })
--- a/sist2-vue/src/views/Configuration.vue
+++ b/sist2-vue/src/views/Configuration.vue
@ -45,6 +45,16 @@
          <b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
            {{ $t("opt.useDatePicker") }}
          </b-form-checkbox>
+
+          <b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
+              $t("opt.simpleLightbox")
+            }}
+          </b-form-checkbox>
+
+          <b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
+              $t("opt.showTagPickerFilter")
+            }}
+          </b-form-checkbox>
        </b-card>

        <br/>
@ -239,6 +249,8 @@ export default {
      "optUpdateMimeMap",
      "optUseDatePicker",
      "optVidPreviewInterval",
+      "optSimpleLightbox",
+      "optShowTagPickerFilter",
    ]),
    clientWidth() {
      return window.innerWidth;
@ -285,6 +297,8 @@ export default {
      "setOptUpdateMimeMap",
      "setOptUseDatePicker",
      "setOptVidPreviewInterval",
+      "setOptSimpleLightbox",
+      "setOptShowTagPickerFilter",
    ]),
    onResetClick() {
      localStorage.removeItem("sist2_configuration");
--- a/sist2-vue/src/views/FilePage.vue
+++ b/sist2-vue/src/views/FilePage.vue
@ -56,6 +56,22 @@ export default Vue.extend({
    onThumbnailClick() {
      window.open(`/f/${this.doc._id}`, "_blank");
    },
+    findByCustomField(field, id) {
+      return {
+        query: {
+          bool: {
+            must: [
+              {
+                match: {
+                  [field]: id
+                }
+              }
+            ]
+          }
+        },
+        size: 1
+      }
+    },
    findById(id) {
      return {
        query: {
@ -103,6 +119,8 @@ export default Vue.extend({
      query = this.findById(this.$route.query.byId);
    } else if (this.$route.query.byName) {
      query = this.findByName(this.$route.query.byName);
+    } else if (this.$route.query.by && this.$route.query.q) {
+      query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
    }

    if (query) {
--- a/sist2-vue/src/views/SearchPage.vue
+++ b/sist2-vue/src/views/SearchPage.vue
@ -32,7 +32,7 @@
              <MimePicker></MimePicker>
            </b-tab>
            <b-tab :title="$t('tags')">
-              <TagPicker></TagPicker>
+              <TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
            </b-tab>
          </b-tabs>
        </b-col>
@ -139,7 +139,9 @@ export default Vue.extend({
        this.setSist2Info(data);
        this.setIndices(data.indices);

-        Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
+        const doBlankSearch = !this.$store.state.optUpdateMimeMap;
+
+        Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
          this.$store.commit("setUiMimeMap", mimeMap);
          this.uiLoading = false;
          this.search(true);
@ -206,7 +208,7 @@ export default Vue.extend({
      this.$store.commit("setUiReachedScrollEnd", false);
    },
    async handleSearch(resp: EsResult) {
-      if (resp.hits.hits.length == 0) {
+      if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
        this.$store.commit("setUiReachedScrollEnd", true);
      }

@ -246,6 +248,8 @@ export default Vue.extend({
      this.$store.commit("setLastQueryResult", resp);

      this.docs.push(...resp.hits.hits);
+
+      resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
    },
    getDateRange(): Promise<{ min: number, max: number }> {
      return sist2.esQuery({
--- a/src/cli.c
+++ b/src/cli.c
@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
 }

 void exec_args_destroy(exec_args_t *args) {
+
+    if (args->index_path != NULL) {
+        free(args->index_path);
+    }
+
    free(args);
 }

@ -124,6 +129,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->tn_count = DEFAULT_THUMBNAIL_COUNT;
    } else if (args->tn_count == OPTION_VALUE_DISABLE) {
        args->tn_count = 0;
+    } else if (args->tn_count > 1000) {
+        printf("Invalid value --thumbnail-count argument: %d. Must be <= 1000.\n", args->tn_size);
+        return 1;
    }

    if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
--- a/src/cli.h
+++ b/src/cli.h
@ -85,7 +85,7 @@ typedef struct web_args {
 typedef struct exec_args {
    char *es_url;
    char *es_index;
-    const char *index_path;
+    char *index_path;
    const char *script_path;
    int async_script;
    char *script;
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@ -21,6 +21,8 @@ void free_queue(int max);

 void elastic_flush();

+void print_error(response_t *r);
+
 void destroy_indexer(es_indexer_t *indexer) {

    if (indexer == NULL) {
@ -45,7 +47,7 @@ void elastic_cleanup() {
    destroy_indexer(Indexer);
 }

-void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
+void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {

    cJSON *line = cJSON_CreateObject();

@ -72,19 +74,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
    bulk_line->type = ES_BULK_LINE_DELETE;
    bulk_line->next = NULL;

-    memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
+    strcpy(bulk_line->doc_id, document_id_str);
    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }


-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
+void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
    bulk_line->type = ES_BULK_LINE_INDEX;
    memcpy(bulk_line->line, json, json_len);
-    memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
+    strcpy(bulk_line->doc_id, doc_id);
    *(bulk_line->line + json_len) = '\n';
    *(bulk_line->line + json_len + 1) = '\0';
    bulk_line->next = NULL;
@ -93,7 +95,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
+void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {

    if (Indexer == NULL) {
        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@ -108,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[MD
    cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
    cJSON_AddStringToObject(term_obj, "index", index_id);

-    char *str = cJSON_Print(body);
+    char *str = cJSON_PrintUnformatted(body);

-    char bulk_url[4096];
+    char url[4096];
    if (async) {
-        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
+        snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
                 Indexer->es_index);
    } else {
-        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
+        snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
    }
-    response_t *r = web_post(bulk_url, str);
+    response_t *r = web_post(url, str);
    if (!async) {
        LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
    }
@ -137,6 +139,11 @@ void execute_update_script(const char *script, int async, const char index_id[MD

    if (async) {
        cJSON *task = cJSON_GetObjectItem(resp, "task");
+
+        if (task == NULL) {
+            LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
+        }
+
        LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
    }

@ -167,7 +174,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
            snprintf(
                    action_str, sizeof(action_str),
                    "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
-                    line->path_md5_str, Indexer->es_index
+                    line->doc_id, Indexer->es_index
            );

            size_t action_str_len = strlen(action_str);
@ -184,7 +191,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
            snprintf(
                    action_str, sizeof(action_str),
                    "{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
-                    line->path_md5_str, Indexer->es_index
+                    line->doc_id, Indexer->es_index
            );

            size_t action_str_len = strlen(action_str);
@ -212,7 +219,13 @@ void print_errors(response_t *r) {
    *(tmp + r->size) = '\0';

    cJSON *ret_json = cJSON_Parse(tmp);
-    if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
+    cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
+
+    if (errors == NULL) {
+        char *str = cJSON_Print(ret_json);
+        LOG_ERRORF("elastic.c", "%s\n", str);
+        cJSON_free(str);
+    } else if (errors->valueint != 0) {
        cJSON *err;
        cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
            if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@ -263,7 +276,7 @@ void _elastic_flush(int max) {
    if (r->status_code == 413) {

        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
            free_response(r);
            free(buf);
            free_queue(1);
@ -413,12 +426,20 @@ es_version_t *elastic_get_version(const char *es_url) {
    *(tmp + r->size) = '\0';
    cJSON *response = cJSON_Parse(tmp);
    free(tmp);
-    free_response(r);

    if (response == NULL) {
        return NULL;
    }

+    if (cJSON_GetObjectItem(response, "error") != NULL) {
+        LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
+        print_error(r);
+        free_response(r);
+        return NULL;
+    }
+
+    free_response(r);
+
    if (cJSON_GetObjectItem(response, "version") == NULL ||
        cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
        cJSON_Delete(response);
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@ -8,7 +8,7 @@

 typedef struct es_bulk_line {
    struct es_bulk_line *next;
-    char path_md5_str[MD5_STR_LENGTH];
+    char doc_id[SIST_DOC_ID_LEN];
    int type;
    char line[0];
 } es_bulk_line_t;
@ -20,8 +20,8 @@ typedef struct {
 } es_version_t;

 #define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
-#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
-#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
+#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8))
+#define USE_LEGACY_ES_SETTINGS(es_version) ((es_version) != NULL && (!VERSION_GE((es_version), 7, 14)))

 __always_inline
 static const char *format_es_version(es_version_t *version) {
@ -40,9 +40,9 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);

-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);

 void delete_document(const char *document_id_str, void* data);

@ -59,6 +59,6 @@ char *elastic_get_status();

 es_version_t *elastic_get_version(const char *es_url);

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
+void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);

 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@ -22,7 +22,7 @@ void free_response(response_t *resp) {
    free(resp);
 }

-void web_post_async_poll(subreq_ctx_t* req) {
+void web_post_async_poll(subreq_ctx_t *req) {
    fd_set fdread;
    fd_set fdwrite;
    fd_set fdexcep;
@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {

    CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);

-    if(mc != CURLM_OK) {
+    if (mc != CURLM_OK) {
        req->done = TRUE;
        return;
    }
@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
    struct timeval timeout = {1, 0};
    int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);

-    switch(rc) {
+    switch (rc) {
        case -1:
            req->done = TRUE;
            break;
@ -142,6 +142,9 @@ response_t *web_post(const char *url, const char *data) {
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");

+    char err_buffer[CURL_ERROR_SIZE + 1] = {};
+    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
+
    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
@ -151,12 +154,16 @@ response_t *web_post(const char *url, const char *data) {
    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

-    curl_easy_cleanup(curl);
-    curl_slist_free_all(headers);
-
    resp->body = buffer.buf;
    resp->size = buffer.cur;

+    if (resp->status_code == 0) {
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+    }
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
    return resp;
 }

@ -175,7 +182,7 @@ response_t *web_put(const char *url, const char *data) {
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
    curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
-    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
+    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);

    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@ -124,9 +124,7 @@ char *build_json_string(document_t *doc) {
        cJSON_AddStringToObject(json, "path", "");
    }

-    char md5_str[MD5_STR_LENGTH];
-    buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
-    cJSON_AddStringToObject(json, "_id", md5_str);
+    cJSON_AddStringToObject(json, "_id", doc->doc_id);

    // Metadata
    meta_line_t *meta = doc->meta_head;
@ -452,32 +450,31 @@ void read_lines(const char *path, const line_processor_t processor) {

    dyn_buffer_destroy(&buf);
    fclose(file);
-
 }

-void read_index_ndjson(const char *line, void* _data) {
-    void** data = _data;
-    const char* index_id = data[0];
+void read_index_ndjson(const char *line, void *_data) {
+    void **data = _data;
+    const char *index_id = data[0];
    index_func func = data[1];
    read_index_bin_handle_line(line, index_id, func);
 }

-void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
+void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
    if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
        read_lines(path, (line_processor_t) {
-            .data = (void*[2]){(void*)index_id, func} ,
-            .func = read_index_ndjson,
+                .data = (void *[2]) {(void *) index_id, func},
+                .func = read_index_ndjson,
        });
    }
 }

 static __thread GHashTable *IncrementalReadTable = NULL;

-void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
+void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
    const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;

-    incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
+    incremental_put(IncrementalReadTable, path_md5_str, mtime);
 }

 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
@ -490,13 +487,11 @@ static __thread GHashTable *IncrementalNewTable = NULL;
 static __thread store_t *IncrementalCopySourceStore = NULL;
 static __thread store_t *IncrementalCopyDestinationStore = NULL;

-void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
+void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {

-    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
-    unsigned char path_md5[MD5_DIGEST_LENGTH];
-    hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
+    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;

-    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
+    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
        // Copy index line
        cJSON_DeleteItemFromObject(document, "index");
        char *json_str = cJSON_PrintUnformatted(document);
@ -510,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S

        // Copy tn store contents
        size_t buf_len;
-        char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
+        char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
        if (buf_len != 0) {
-            store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
+            store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
            free(buf);
        }
    }
@ -536,24 +531,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
    read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
 }

-void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
+void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {

-    char path_md5_n[MD5_STR_LENGTH + 1];
-    path_md5_n[MD5_STR_LENGTH] = '\0';
-    path_md5_n[MD5_STR_LENGTH - 1] = '\n';
-    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
+    char doc_id_n[SIST_DOC_ID_LEN + 1];
+    doc_id_n[SIST_DOC_ID_LEN] = '\0';
+    doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
+    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;

    // do not delete archive virtual entries
    if (cJSON_GetObjectItem(document, "parent") == NULL 
-        && !incremental_get_str(IncrementalCopyTable, path_md5_str)
-        && !incremental_get_str(IncrementalNewTable, path_md5_str)
+        && !incremental_get(IncrementalCopyTable, doc_id)
+        && !incremental_get(IncrementalNewTable, doc_id)
        ) {
-        memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
-        zstd_write_string(path_md5_n, MD5_STR_LENGTH);
+        memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
+        zstd_write_string(doc_id, sizeof(doc_id_n));
    }
 }

-void incremental_delete(const char *del_filepath, const char* index_filepath, 
+void incremental_delete(const char *del_filepath, const char *index_filepath,
                        GHashTable *copy_table, GHashTable *new_table) {

    if (WriterCtx.out_file == NULL) {
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@ -12,7 +12,7 @@ typedef struct line_processor {
  void (*func)(const char*, void*);
 } line_processor_t;

-typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
+typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);

 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);
@ -24,7 +24,7 @@ void write_document(document_t *doc);

 void read_lines(const char *path, const line_processor_t processor);

-void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
+void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);

@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
 // caller ensures char file_path[PATH_MAX]
 #define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
    snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path);              \
-    if (0 == access(file_path, R_OK)) {                                                 \
+    if (access(file_path, R_OK) == 0) {                                                 \
        action_ok;                                                                      \
    } else {                                                                            \
        action_main_fail;                                                               \
    }                                                                                   \
    snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path);          \
-    if ((cond_original) && (0 == access(file_path, R_OK))) {                            \
+    if ((cond_original) && access(file_path, R_OK) == 0) {                              \
        action_ok;                                                                      \
    }                                                                                   \

--- a/src/io/store.c
+++ b/src/io/store.c
@ -52,22 +52,7 @@ void store_flush(store_t *store) {
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

    if (LogCtx.very_verbose) {
-        if (key_len == MD5_DIGEST_LENGTH) {
-            char path_md5_str[MD5_STR_LENGTH];
-            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
-
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
-
-        } else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
-            char path_md5_str[MD5_STR_LENGTH];
-            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
-
-            LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
-                       path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
-
-        } else {
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
-        }
+        LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
    }

 #if (SIST_FAKE_STORE != 1)
--- a/src/io/walk.c
+++ b/src/io/walk.c
@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,

    job->vfile.info = *info;

-    memset(job->parent, 0, MD5_DIGEST_LENGTH);
+    job->parent[0] = '\0';

    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
--- a/src/main.c
+++ b/src/main.c
@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;

 void sig_handler(int signum) {

-    LogCtx.verbose = 1;
-    LogCtx.very_verbose = 1;
+    LogCtx.verbose = TRUE;
+    LogCtx.very_verbose = TRUE;

    LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
    LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@ -103,7 +103,7 @@ void sig_handler(int signum) {
    exit(-1);
 }

-void init_dir(const char *dirpath, scan_args_t* args) {
+void init_dir(const char *dirpath, scan_args_t *args) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);

    if (args->incremental != NULL) {
-      // copy old index id
-      char descriptor_path[PATH_MAX];
-      snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
-      index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
-      memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
+        // copy old index id
+        char descriptor_path[PATH_MAX];
+        snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
+        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
+        memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
    } else {
-      // genreate new index id based on timestamp
-      unsigned char index_md5[MD5_DIGEST_LENGTH];
-      MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
-      buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+        // generate new index id based on timestamp
+        unsigned char index_md5[MD5_DIGEST_LENGTH];
+        MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
+        buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
    }

    write_index_descriptor(path, &ScanCtx.index.desc);
@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
    }

-    READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
-                 LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
-                 1);
+    READ_INDICES(
+            file_path,
+            args->incremental,
+            incremental_read(ScanCtx.original_table, file_path, &original_desc),
+            LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
+            TRUE
+    );

    LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
 }
@ -534,6 +538,7 @@ void sist2_exec_script(exec_args_t *args) {

    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
+    IndexCtx.needs_es_connection = TRUE;

    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)

@ -776,9 +781,8 @@ int main(int argc, const char *argv[]) {
        sist2_exec_script(exec_args);

    } else {
-        fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
        argparse_usage(&argparse);
-        goto end;
+        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
    }
    printf("\n");

--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@ -69,7 +69,7 @@ void parse(void *arg) {
    doc->base = (short) job->base;

    char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
-    MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
+    generate_doc_id(rel_path, doc->doc_id);

    doc->meta_head = NULL;
    doc->meta_tail = NULL;
@ -77,10 +77,10 @@ void parse(void *arg) {
    doc->size = job->vfile.info.st_size;
    doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;

-    int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
+    int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
        pthread_mutex_lock(&ScanCtx.copy_table_mu);
-        incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
+        incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
        pthread_mutex_unlock(&ScanCtx.copy_table_mu);

        pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
@ -96,16 +96,14 @@ void parse(void *arg) {

    if (ScanCtx.new_table != NULL) {
        pthread_mutex_lock(&ScanCtx.copy_table_mu);
-        incremental_mark_file(ScanCtx.new_table, doc->path_md5);
+        incremental_mark_file(ScanCtx.new_table, doc->doc_id);
        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
    }

    char *buf[MAGIC_BUF_SIZE];

    if (LogCtx.very_verbose) {
-        char path_md5_str[MD5_STR_LENGTH];
-        buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
    }

    if (job->vfile.info.st_size == 0) {
@ -218,10 +216,10 @@ void parse(void *arg) {
    abort:

    //Parent meta
-    if (!md5_digest_is_null(job->parent)) {
-        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
+    if (job->parent[0] != '\0') {
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
        meta_parent->key = MetaParent;
-        buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
+        strcpy(meta_parent->str_val, job->parent);
        APPEND_META((doc), meta_parent)

        doc->has_parent = TRUE;
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@ -23,16 +23,19 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    }
    char *json_str = cJSON_PrintUnformatted(json);

-    unsigned char path_md5[MD5_DIGEST_LENGTH];
-    MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
-        path_md5);
+    char assoc_doc_id[SIST_DOC_ID_LEN];

-    char path_md5_str[MD5_STR_LENGTH];
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
+    char rel_path[PATH_MAX];
+    size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
+    memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
+    *(rel_path + rel_path_len) = '\0';

-    store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
+    generate_doc_id(rel_path, assoc_doc_id);
+
+    store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
+                strlen(json_str) + 1);

    cJSON_Delete(json);
    free(json_str);
    free(buf);
-}
+}
--- a/src/sist.h
+++ b/src/sist.h
@ -27,10 +27,6 @@

 #define UNUSED(x) __attribute__((__unused__))  x

-#define MD5_STR_LENGTH 33
-#define SHA1_STR_LENGTH 41
-#define SHA1_DIGEST_LENGTH 20
-
 #include "util.h"
 #include "log.h"
 #include "types.h"
@ -53,14 +49,14 @@
 #include <ctype.h>
 #include "git_hash.h"

-#define VERSION "2.11.7"
+#define VERSION "2.12.0"
 static const char *const Version = VERSION;

 #ifndef SIST_PLATFORM
 #define SIST_PLATFORM unknown
 #endif

-#define EXPECTED_MONGOOSE_VERSION "7.3"
+#define EXPECTED_MONGOOSE_VERSION "7.6"

 #define Q(x) #x
 #define QUOTE(x) Q(x)
--- a/src/stats.c
+++ b/src/stats.c
@ -20,7 +20,7 @@ typedef struct {
    long count;
 } agg_t;

-void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
+void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {

    if (cJSON_GetObjectItem(document, "parent") != NULL) {
        return;
--- a/src/types.h
+++ b/src/types.h
@ -4,7 +4,7 @@
 #define INDEX_TYPE_NDJSON "ndjson"

 typedef struct index_descriptor {
-    char id[MD5_STR_LENGTH];
+    char id[SIST_INDEX_ID_LEN];
    char version[64];
    long timestamp;
    char root[PATH_MAX];
--- a/src/util.h
+++ b/src/util.h
@ -10,8 +10,6 @@
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"

-#define MD5_STR_LENGTH 33
-

 char *abspath(const char *path);

@ -94,40 +92,24 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {


 __always_inline
-static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
-    return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
+static void generate_doc_id(const char *rel_path, char *doc_id) {
+    unsigned char md[MD5_DIGEST_LENGTH];
+
+    MD5((unsigned char *) rel_path, strlen(rel_path), md);
+    buf2hex(md, sizeof(md), doc_id);
 }

-
 __always_inline
-static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
+    char *ptr = malloc(SIST_DOC_ID_LEN);
+    strcpy(ptr, doc_id);
    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
 }

 __always_inline
-static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    strcpy(ptr, path_md5);
-    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
-}
-
-__always_inline
-static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
+static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
    if (table != NULL) {
-        char md5_str[MD5_STR_LENGTH];
-        buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
-    } else {
-        return 0;
-    }
-}
-
-__always_inline
-static int incremental_get_str(GHashTable *table, const char *path_md5) {
-    if (table != NULL) {
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
+        return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
    } else {
        return 0;
    }
@ -138,9 +120,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
 * !!Not thread safe.
 */
 __always_inline
-static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
+    char *ptr = malloc(SIST_DOC_ID_LEN);
+    strcpy(ptr, doc_id);
    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
 }

--- a/src/web/serve.c
+++ b/src/web/serve.c
@ -12,6 +12,13 @@
 #define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
 #define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");

+static struct mg_http_serve_opts DefaultServeOpts = {
+        .fs = NULL,
+        .ssi_pattern = NULL,
+        .root_dir = NULL,
+        .mime_types = ""
+};
+

 static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
    mg_printf(
@ -29,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t

 index_t *get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
-        if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
+        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
            return &WebCtx.indices[i];
        }
    }
@ -54,7 +61,7 @@ store_t *get_tag_store(const char *index_id) {

 void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", "text/html", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
        mg_send(nc, index_html, sizeof(index_html));
@ -63,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {

 void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {

-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_index_id[SIST_INDEX_ID_LEN];
+    memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';

-    index_t *index = get_index_by_id(arg_md5);
+    index_t *index = get_index_by_id(arg_index_id);
    if (index == NULL) {
        HTTP_REPLY_NOT_FOUND
        return;
    }

    const char *file;
-    switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
+    switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
        case 1:
            file = "treemap.csv";
            break;
@ -104,12 +111,13 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    strcpy(full_path, index->path);
    strcat(full_path, file);

-    mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
+    struct mg_http_serve_opts opts = {};
+    mg_http_serve_file(nc, hm, full_path, &opts);
 }

 void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", "application/javascript", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
        mg_send(nc, index_js, sizeof(index_js));
@ -118,7 +126,7 @@ void javascript(struct mg_connection *nc, struct mg_http_message *hm) {

 void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", "application/javascript", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
        mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
@ -142,28 +150,25 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {

 void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {

-    int parse_tn_num = FALSE;
+    int has_thumbnail_index = FALSE;

-    if (hm->uri.len != 68) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {

-        if (hm->uri.len != 68 + 4) {
+        if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
            LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
            HTTP_REPLY_NOT_FOUND
            return;
        }
-        parse_tn_num = TRUE;
+        has_thumbnail_index = TRUE;
    }

-    char arg_file_md5[MD5_STR_LENGTH];
-    char arg_index[MD5_STR_LENGTH];
+    char arg_doc_id[SIST_DOC_ID_LEN];
+    char arg_index[SIST_INDEX_ID_LEN];

-    memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
-    memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
-    *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
-
-    unsigned char md5_buf[MD5_DIGEST_LENGTH];
-    hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
+    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';

    store_t *store = get_store(arg_index);
    if (store == NULL) {
@ -175,16 +180,18 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
    char *data;
    size_t data_len = 0;

-    if (parse_tn_num) {
-        int tn_num = atoi(hm->uri.ptr + 68);
+    if (has_thumbnail_index) {
+        const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;

-        char tn_key[sizeof(md5_buf) + sizeof(int)];
-        memcpy(tn_key, md5_buf, sizeof(md5_buf));
-        memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
+        char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
+
+        memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
+        memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
+        *(tn_key + sizeof(tn_key) - 1) = '\0';

        data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
    } else {
-        data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
+        data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
    }

    if (data_len != 0) {
@ -274,10 +281,18 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s

    char disposition[8192];
    snprintf(disposition, sizeof(disposition),
-             HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
+             HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\n"
+             "Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
             name, strlen(ext) == 0 ? "" : ".", ext);

-    mg_http_serve_file(nc, hm, full_path, mime, disposition);
+    char mime_mapping[1024];
+    snprintf(mime_mapping, sizeof(mime_mapping), "%s=%s", ext, mime);
+
+    struct mg_http_serve_opts opts = {
+            .extra_headers = disposition,
+            .mime_types = mime_mapping
+    };
+    mg_http_serve_file(nc, hm, full_path, &opts);
 }

 void cache_es_version() {
@ -298,13 +313,18 @@ void index_info(struct mg_connection *nc) {

    cache_es_version();

+    const char *es_version = "0.0.0";
+    if (WebCtx.es_version != NULL) {
+        es_version = format_es_version(WebCtx.es_version);
+    }
+
    cJSON *json = cJSON_CreateObject();
    cJSON *arr = cJSON_AddArrayToObject(json, "indices");

    cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
    cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
    cJSON_AddStringToObject(json, "version", Version);
-    cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
+    cJSON_AddStringToObject(json, "esVersion", es_version);
    cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
    cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
    cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
@ -339,55 +359,19 @@ void index_info(struct mg_connection *nc) {
 }


-void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
-
-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
-        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
-
-    cJSON *doc = elastic_get_document(arg_md5);
-    cJSON *source = cJSON_GetObjectItem(doc, "_source");
-
-    cJSON *index_id = cJSON_GetObjectItem(source, "index");
-    if (index_id == NULL) {
-        cJSON_Delete(doc);
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    index_t *idx = get_index_by_id(index_id->valuestring);
-    if (idx == NULL) {
-        cJSON_Delete(doc);
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    char *json_str = cJSON_PrintUnformatted(source);
-    send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
-    mg_send(nc, json_str, (int) strlen(json_str));
-    free(json_str);
-    cJSON_Delete(doc);
-}
-
 void file(struct mg_connection *nc, struct mg_http_message *hm) {

-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
+    if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_doc_id[SIST_DOC_ID_LEN];
+    memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';

-    const char *next = arg_md5;
+    const char *next = arg_doc_id;
    cJSON *doc = NULL;
    cJSON *index_id = NULL;
    cJSON *source = NULL;
@ -438,7 +422,6 @@ void status(struct mg_connection *nc) {
 typedef struct {
    char *name;
    int delete;
-    char *path_md5_str;
    char *doc_id;
 } tag_req_t;

@ -458,12 +441,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
        return NULL;
    }

-    cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
-    if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
-        strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
-        return NULL;
-    }
-
    cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
    if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
        return NULL;
@ -472,22 +449,21 @@ tag_req_t *parse_tag_request(cJSON *json) {
    tag_req_t *req = malloc(sizeof(tag_req_t));
    req->delete = arg_delete->valueint;
    req->name = arg_name->valuestring;
-    req->path_md5_str = arg_path_md5->valuestring;
    req->doc_id = arg_doc_id->valuestring;

    return req;
 }

 void tag(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_index[MD5_STR_LENGTH];
-    memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
+    char arg_index[SIST_INDEX_ID_LEN];
+    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';

    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
        LOG_DEBUG("serve.c", "Invalid tag request")
@ -519,7 +495,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
    cJSON *arr = NULL;

    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
+    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
    if (data_len == 0) {
        arr = cJSON_CreateArray();
    } else {
@ -579,7 +555,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
    }

    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
+    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
    store_flush(store);

    free(arg_req);
@ -641,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                return;
            }
            tag(nc, hm);
-        } else if (mg_http_match_uri(hm, "/d/*")) {
-            document_info(nc, hm);
        } else {
            HTTP_REPLY_NOT_FOUND
        }
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/tests/test_scan.py
+++ b/tests/test_scan.py
@ -35,10 +35,20 @@ def sist2_index(files, *args):
    path = copy_files(files)

    shutil.rmtree("test_i", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i", *args)
+    sist2("scan", path, "-o", "test_i", "-t12", *args)
    return iter(sist2_index_to_dict("test_i"))


+def get_lmdb_contents(path):
+    import lmdb
+
+    env = lmdb.open(path)
+
+    txn = env.begin(write=False)
+
+    return dict((k, v) for k, v in txn.cursor())
+
+
 def sist2_incremental_index(files, func=None, incremental_index=False, *args):
    path = copy_files(files)

@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
        func(path)

    shutil.rmtree("test_i_inc", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
+    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
    return iter(sist2_index_to_dict("test_i_inc", incremental_index))


@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
                pass

        file_count = sum(1 for _ in sist2_index(TEST_FILES))
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
+        lmdb_full = get_lmdb_contents("test_i/thumbs")
+
+        # Remove files
+        num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
+        lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_rm1, file_count - 2)
+        self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
+
+        # add files (incremental_index=True)
+        num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
+        lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_add_inc, 3)
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
+
+        # add files
+        num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
+        lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_add, file_count + 3)
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
+
+        # (No action)
+        sist2_incremental_index(TEST_FILES)
+        lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
+
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))


 if __name__ == "__main__":
--- a/third-party/libscan/CMakeLists.txt
+++ b/third-party/libscan/CMakeLists.txt
@ -6,26 +6,11 @@ set(CMAKE_C_STANDARD 11)
 option(BUILD_TESTS "Build tests" on)

 add_subdirectory(third-party/antiword)
-if (SIST_DEBUG)
-    add_compile_definitions(
-            antiword
-            DEBUG
-    )
-    target_compile_options(
-            antiword
-            PRIVATE
-            -g
-            -fstack-protector
-            -fno-omit-frame-pointer
-            -fsanitize=address
-            -fno-inline
-    )
-else()
-    add_compile_definitions(
-            antiword
-            NDEBUG
-    )
-endif()
+
+set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
+set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+add_subdirectory(third-party/libmobi)

 add_library(
        scan
@ -48,6 +33,54 @@ add_library(
        libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
 set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)

+if (SIST_DEBUG)
+    add_compile_definitions(
+            antiword
+            DEBUG
+    )
+    target_compile_options(
+            antiword
+            PRIVATE
+            -g
+            -fstack-protector
+            -fno-omit-frame-pointer
+            -fsanitize=address
+            -fno-inline
+    )
+elseif (SIST_FAST)
+    add_compile_definitions(
+            antiword
+            NDEBUG
+    )
+
+    target_compile_options(
+            scan
+            PRIVATE
+
+            -Ofast
+            -march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            -freciprocal-math
+    )
+else()
+    add_compile_definitions(
+            antiword
+            NDEBUG
+    )
+
+    target_compile_options(
+            scan
+            PRIVATE
+
+            -Ofast
+            #-march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            #-freciprocal-math
+    )
+endif()
+
 set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)

 find_package(cJSON CONFIG REQUIRED)
@ -85,35 +118,15 @@ target_compile_options(
        -g
 )

-include(ExternalProject)
-find_program(MAKE_EXE NAMES gmake nmake make)
-ExternalProject_Add(
-        libmobi
-        GIT_REPOSITORY https://github.com/simon987/libmobi.git
-        GIT_TAG "public"
-
-        UPDATE_COMMAND ""
-        PATCH_COMMAND ""
-        TEST_COMMAND ""
-        CONFIGURE_COMMAND ./autogen.sh && ./configure
-        INSTALL_COMMAND ""
-
-        PREFIX "third-party/ext_libmobi"
-        SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
-        BINARY_DIR "third-party/ext_libmobi/src/libmobi"
-
-        BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
-)
-
-SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
-SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
-
 if (SIST_DEBUG)
    SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
 else()
    SET(FFMPEG_DEBUG "")
 endif()

+include(ExternalProject)
+find_program(MAKE_EXE NAMES gmake nmake make)
+
 ExternalProject_Add(
        ffmpeg
        GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@ -159,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp

 add_dependencies(
        scan
-        libmobi
        ffmpeg
        antiword
        libwpd
+        mobi
 )

 target_link_libraries(
@ -180,8 +193,6 @@ target_link_libraries(
        ${MUPDF_LIB}
        openjp2

-        ${MOBI_LIB_DIR}/libmobi.a
-
        ${WPD_LIB_DIR}/libwpd-0.9.a
        ${WPD_LIB_DIR}/libwpd-stream-0.9.a

@ -218,6 +229,7 @@ target_link_libraries(
        ${GUMBO_LIB}
        dl
        antiword
+        mobi
        unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
 )

--- a/third-party/libscan/libscan/arc/arc.c
+++ b/third-party/libscan/libscan/arc/arc.c
@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
        sub_job->vfile.logf = ctx->logf;
        sub_job->vfile.has_checksum = FALSE;
        sub_job->vfile.calculate_checksum = f->calculate_checksum;
-        memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
+        strcpy(sub_job->parent, doc->doc_id);

        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
            sub_job->vfile.info = *archive_entry_stat(entry);
--- a/third-party/libscan/libscan/ebook/ebook.c
+++ b/third-party/libscan/libscan/ebook/ebook.c
@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

    free(samples);
    av_packet_unref(&jpeg_packet);
--- a/third-party/libscan/libscan/font/font.c
+++ b/third-party/libscan/libscan/font/font.c
@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    bmp_format(&bmp_data, dimensions, bitmap);

    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
+    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);

    dyn_buffer_destroy(&bmp_data);
    free(bitmap);
--- a/third-party/libscan/libscan/macros.h
+++ b/third-party/libscan/libscan/macros.h
@ -20,8 +20,10 @@
 #undef ABS
 #define ABS(a) (((a) < 0) ? -(a) : (a))

-#define SHA1_STR_LENGTH 41
-#define SHA1_DIGEST_LENGTH 20
+#define SHA1_DIGEST_LENGTH SHA_DIGEST_LENGTH
+
+#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
+#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)

 #define APPEND_STR_META(doc, keyname, value) \
    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
--- a/third-party/libscan/libscan/media/media.c
+++ b/third-party/libscan/libscan/media/media.c
@ -4,7 +4,12 @@

 #define MIN_SIZE 32
 #define AVIO_BUF_SIZE 8192
-#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
+#define IS_VIDEO(fmt) ( \
+    (fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
+    && strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
+    && strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
+    && strcmp((fmt)->iformat->name, "png_pipe") != 0 \
+    )


 #define STORE_AS_IS ((void*)-1)
@ -279,18 +284,22 @@ static void
 append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {

    if (is_video) {
-        meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
-        meta_duration->key = MetaMediaDuration;
-        meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
-        if (meta_duration->long_val > INT32_MAX) {
-            meta_duration->long_val = 0;
+        if (pFormatCtx->duration / AV_TIME_BASE != 0) {
+            meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
+            meta_duration->key = MetaMediaDuration;
+            meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
+            if (meta_duration->long_val > INT32_MAX) {
+                meta_duration->long_val = 0;
+            }
+            APPEND_META(doc, meta_duration)
        }
-        APPEND_META(doc, meta_duration)

-        meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
-        meta_bitrate->key = MetaMediaBitrate;
-        meta_bitrate->long_val = pFormatCtx->bit_rate;
-        APPEND_META(doc, meta_bitrate)
+        if (pFormatCtx->bit_rate != 0) {
+            meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
+            meta_bitrate->key = MetaMediaBitrate;
+            meta_bitrate->long_val = pFormatCtx->bit_rate;
+            APPEND_META(doc, meta_bitrate)
+        }
    }

    AVDictionaryEntry *tag = NULL;
@ -459,7 +468,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
    if (scaled_frame == STORE_AS_IS) {
        return_value = SAVE_THUMBNAIL_OK;

-        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
                   frame_and_packet->packet->size);
    } else {
        // Encode frame to jpeg
@ -473,7 +482,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor

        // Save thumbnail
        if (thumbnail_index == 0) {
-            ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+            ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
            return_value = SAVE_THUMBNAIL_OK;

        } else if (thumbnail_index > 1) {
@ -482,9 +491,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
            //  I figure out a better fix.
            thumbnail_index -= 1;

-            char tn_key[sizeof(doc->path_md5) + sizeof(int)];
-            memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
-            memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
+            char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
+            snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);

            ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
        } else {
@ -578,9 +586,10 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,

        int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);

-        int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
-                                     // Limit to ~1 thumbnail every 5s
-                                     ? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
+        int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
+                                      video_duration_in_seconds >= 15)
+                                     // Limit to ~1 thumbnail every 7s
+                                     ? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
                                     : 1;

        const double seek_increment = thumbnails_to_generate == 1
@ -845,7 +854,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu

    if (scaled_frame == STORE_AS_IS) {
        APPEND_LONG_META(doc, MetaThumbnail, 1)
-        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
                   frame_and_packet->packet->size);
    } else {
        // Encode frame to jpeg
@ -859,7 +868,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu

        // Save thumbnail
        APPEND_LONG_META(doc, MetaThumbnail, 1)
-        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

        av_packet_unref(&jpeg_packet);
        avcodec_free_context(&jpeg_encoder);
--- a/third-party/libscan/libscan/mobi/scan_mobi.c
+++ b/third-party/libscan/libscan/mobi/scan_mobi.c
@ -1,6 +1,6 @@
 #include "scan_mobi.h"

-#include <mobi.h>
+#include "../../third-party/libmobi/src/mobi.h"
 #include <errno.h>
 #include "stdlib.h"

--- a/third-party/libscan/libscan/ooxml/ooxml.c
+++ b/third-party/libscan/libscan/ooxml/ooxml.c
@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
    archive_read_data(a, buf, entry_size);

    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
+    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
    free(buf);
 }

--- a/third-party/libscan/libscan/raw/raw.c
+++ b/third-party/libscan/libscan/raw/raw.c
@ -7,8 +7,22 @@

 #define MIN_SIZE 32

-int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
-    return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
+int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t *doc) {
+
+    scan_media_ctx_t media_ctx = {
+            .read_subtitles = FALSE,
+            .tn_count = 1,
+            .max_media_buffer = 0,
+            .store = ctx->store,
+            .log = ctx->log,
+            .logf = ctx->logf,
+            .tn_size = ctx->tn_size,
+            .tn_qscale = ctx->tn_qscale,
+            .tesseract_lang = NULL,
+            .tesseract_path = NULL
+    };
+
+    return store_image_thumbnail(&media_ctx, img.thumb, img.tlength, doc, "x.jpeg");
 }

 int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
@ -70,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

    av_packet_unref(&jpeg_packet);
    av_free(*scaled_frame->data);
@ -171,25 +185,25 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
        return;
    }

-    int errc = 0;
-    libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
-    if (errc != 0) {
-        free(buf);
-        libraw_dcraw_clear_mem(thumb);
-        libraw_close(libraw_lib);
-        return;
-    }
-
    int tn_ok = 0;
+
    if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
-        tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
+        tn_ok = store_thumbnail_jpeg(ctx, libraw_lib->thumbnail, doc);
    } else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
        // TODO: technically this should work but is currently untested
+
+        int errc = 0;
+        libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
+        if (errc != 0) {
+            free(buf);
+            libraw_dcraw_clear_mem(thumb);
+            libraw_close(libraw_lib);
+            return;
+        }
+
        tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
    }

-    libraw_dcraw_clear_mem(thumb);
-
    if (tn_ok == TRUE) {
        free(buf);
        libraw_close(libraw_lib);
@ -206,7 +220,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {

    libraw_dcraw_process(libraw_lib);

-    errc = 0;
+    int errc = 0;
    libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
    if (errc != 0) {
        free(buf);
--- a/third-party/libscan/libscan/scan.h
+++ b/third-party/libscan/libscan/scan.h
@ -48,6 +48,9 @@ typedef int scan_code_t;
 #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
 #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);

+#define SIST_DOC_ID_LEN MD5_STR_LENGTH
+#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
+
 enum metakey {
    // String
    MetaContent = 1,
@ -103,7 +106,7 @@ typedef struct meta_line {


 typedef struct document {
-    unsigned char path_md5[MD5_DIGEST_LENGTH];
+    char doc_id[SIST_DOC_ID_LEN];
    unsigned long size;
    unsigned int mime;
    int mtime;
@ -159,7 +162,7 @@ typedef struct parse_job_t {
    int base;
    int ext;
    struct vfile vfile;
-    unsigned char parent[MD5_DIGEST_LENGTH];
+    char parent[SIST_DOC_ID_LEN];
    char filepath[1];
 } parse_job_t;

--- a/third-party/libscan/test/main.cpp
+++ b/third-party/libscan/test/main.cpp
@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
    ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
    ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
    ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
-    ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
    ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
    ASSERT_NE(size_before, store_size);

@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
    cleanup(&doc, &f);
 }

+TEST(Msdoc, Test5Pdf) {
+    vfile_t f;
+    document_t doc;
+    load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
+
+    size_t size_before = store_size;
+
+    parse_msdoc(&msdoc_ctx, &f, &doc);
+
+    ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
+    ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
+    ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
+    ASSERT_NE(size_before, store_size);
+
+    cleanup(&doc, &f);
+}
+
 TEST(Msdoc, TestFuzz1) {
    vfile_t f;
    document_t doc;
@ -1189,4 +1205,7 @@ int main(int argc, char **argv) {
    av_log_set_level(AV_LOG_QUIET);
    ::testing::InitGoogleTest(&argc, argv);
    return RUN_ALL_TESTS();
-}
+}
+
+// 0x6130000d2580
+// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...
--- a/third-party/libscan/third-party/antiword
+++ b/third-party/libscan/third-party/antiword
@ -1 +1 @@
-Subproject commit 62ae66db99e9dd88dfa31999f516f71bb8bdc8b2
+Subproject commit ddb042143e72a8b789e06f09dbc897dfa9f15b82
--- a/third-party/libscan/third-party/libmobi
+++ b/third-party/libscan/third-party/libmobi
@ -0,0 +1 @@
+Subproject commit 395dbde361a80353a9ed8b65d01d6066554142b3
				`@ -0,0 +1 @@`
				`Subproject commit 395dbde361a80353a9ed8b65d01d6066554142b3`