Merge pull request #288 from simon987/dev

v2.12.1
Version bump
2025-12-13 07:19:06 +00:00 · 2022-04-23 10:30:19 -04:00 · 2022-04-23 10:29:50 -04:00 · 2022-04-23 10:29:31 -04:00 · 2022-04-18 12:55:22 -04:00 · 2022-04-17 13:42:40 -04:00
96 changed files with 4919 additions and 2586 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,4 +27,5 @@ sist2
 **/ext_libmobi
 **/ext_libwpd
 **/core
-*.a
+*.a
+tmp_scan/
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
 [submodule "third-party/libscan/third-party/antiword"]
 	path = third-party/libscan/third-party/antiword
 	url = https://github.com/simon987/antiword
+[submodule "third-party/libscan/third-party/libmobi"]
+	path = third-party/libscan/third-party/libmobi
+	url = https://github.com/bfabiszewski/libmobi
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
 project(sist2 C)

 option(SIST_DEBUG "Build a debug executable" on)
+option(SIST_FAST "Enable more optimisation flags" off)
 option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)

 add_compile_definitions(
@@ -21,7 +22,6 @@ set(ARGPARSE_SHARED off)
 add_subdirectory(third-party/argparse)

 add_executable(sist2
-
        src/main.c
        src/sist.h
        src/io/walk.h src/io/walk.c
@@ -55,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
 find_package(CURL CONFIG REQUIRED)
+find_library(MAGIC_LIB
+        NAMES libmagic.so.1 magic
+        PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
+)


 target_include_directories(
@@ -94,10 +98,22 @@ if (SIST_DEBUG)
            PROPERTIES
            OUTPUT_NAME sist2_debug
    )
+elseif (SIST_FAST)
+    target_compile_options(
+            sist2
+            PRIVATE
+
+            -Ofast
+            -march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            -freciprocal-math
+    )
 else ()
    target_compile_options(
            sist2
            PRIVATE
+
            -Ofast
            -fno-stack-protector
            -fomit-frame-pointer
@@ -122,11 +138,12 @@ target_link_libraries(
        CURL::libcurl

        pthread
-        magic

        c

        scan
+
+        ${MAGIC_LIB}
 )

 add_custom_target(
--- a/6
+++ b/6
@@ -5,11 +5,11 @@ WORKDIR /build/
 COPY . .
 RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
 RUN make -j$(nproc)
-RUN strip sist2
+RUN strip sist2 || mv sist2_debug sist2

-FROM ubuntu:21.10
+FROM --platform="linux/amd64" ubuntu:21.10

-RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
+RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
--- a/README.md
+++ b/README.md
@@ -48,10 +48,11 @@ sist2 (Simple incremental search tool)
              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
        ```
 1. Download sist2 executable
-    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
-    1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
+    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases). 
+Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
+    2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
       recommended!)*
-    1. *(or)* `docker pull simon987/sist2:2.11.6-x64-linux`
+    3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`

 1. See [Usage guide](docs/USAGE.md)

@@ -73,8 +74,8 @@ See [Usage guide](docs/USAGE.md) for more details
 | cbz,cbr                                                                   | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | -        | yes         | -                                                                                                                                      |
 | `audio/*`                                                                 | ffmpeg                                                                       | -        | yes         | ID3 tags                                                                                                                               |
 | `video/*`                                                                 | ffmpeg                                                                       | -        | yes         | title, comment, artist                                                                                                                 |
-| `image/*`                                                                 | ffmpeg                                                                       | -        | yes         | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
-| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw                                                                       | -        | yes         | Common EXIF tags, GPS tags                                                                                                             |
+| `image/*`                                                                 | ffmpeg                                                                       | ocr      | yes         | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
+| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw                                                                       | no       | yes         | Common EXIF tags, GPS tags                                                                                                             |
 | ttf,ttc,cff,woff,fnt,otf                                                  | Freetype2                                                                    | -        | yes, `bmp`  | Name & style                                                                                                                           |
 | `text/plain`                                                              | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | no          | -                                                                                                                                      |
 | html, xml                                                                 | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | no          | -                                                                                                                                      |
--- a/contrib/systemd/Makefile
+++ b/contrib/systemd/Makefile
@@ -0,0 +1,7 @@
+install:
+	install sist2-update-all.sh /usr/bin/sist2-update-all.sh
+	install sist2-update-files.sh /usr/bin/sist2-update-files.sh
+	install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
+	install sist2-update.service /etc/systemd/system/sist2-update.service
+	install sist2-update.timer /etc/systemd/system/sist2-update.timer
+	systemctl daemon-reload
--- a/contrib/systemd/README.md
+++ b/contrib/systemd/README.md
@@ -0,0 +1,31 @@
+# Systemd integration example
+
+This example contains my (yatli) personal configuration for sist2 auto-updating.
+The following indices are involved in this configuration:
+
+| Index     | Path             | Description                                |
+|-----------|------------------|--------------------------------------------|
+| files     | /zpool/files     | Main file repository                       |
+| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
+
+The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
+
+### Tailoring the configuration for yourself
+
+`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
+update scripts accordingly to suit your need. Each update script (e.g.
+`sist2-update-files.sh`) has important parameters laid down at the beginning so
+make sure to edit them to point to your files and index locations.
+
+### Installation
+
+```bash
+# install the services and scripts
+sudo make install
+# enable & start the timer
+sudo systemctl enable sist2-update.timer
+sudo systemctl start sist2-update.timer
+# verify that the timer has been enabled
+systemctl list-timers --all
+```
+
--- a/contrib/systemd/sist2-update-all.sh
+++ b/contrib/systemd/sist2-update-all.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -e
+__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+echo "Update index: Files"
+source ${__dir}/sist2-update-files.sh
+echo "Update index: Nextcloud"
+source ${__dir}/sist2-update-nextcloud.sh
+echo "Done. Restarting sist2."
+docker restart sist2-sist2-1
--- a/contrib/systemd/sist2-update-files.sh
+++ b/contrib/systemd/sist2-update-files.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+set -e
+DATE=$(date +%Y_%m_%d)
+CONTENT=/zpool/files
+ORIG=/mnt/ssd/sist-index/files.idx
+NEW=/mnt/ssd/sist-index/files_$DATE.idx
+EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
+NAME=Files
+#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
+REWRITE_URL=""
+
+sist2 scan \
+  --threads 14 \
+  --mem-throttle 32768 \
+  --quality 1.0 \
+  --name $NAME \
+  --ocr-lang=eng+chi_sim \
+  --ocr-ebooks \
+  --ocr-images \
+  --exclude=$EXCLUDE \
+  --rewrite-url=$REWRITE_URL \
+  --incremental=$ORIG \
+  --output=$NEW \
+  $CONTENT
+echo ">>> Scan complete"
+rm -rf $ORIG
+mv $NEW $ORIG 
+
+unset http_proxy
+unset https_proxy
+unset HTTP_PROXY
+unset HTTPS_PROXY
+sist2 index $ORIG --incremental-index
+echo ">>> Index complete"
--- a/contrib/systemd/sist2-update-nextcloud.sh
+++ b/contrib/systemd/sist2-update-nextcloud.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+set -e
+DATE=$(date +%Y_%m_%d)
+CONTENT=/zpool/nextcloud/v-yadli
+ORIG=/mnt/ssd/sist-index/nextcloud.idx
+NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
+EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
+NAME=NextCloud
+# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
+REWRITE_URL=""
+
+sist2 scan \
+  --threads 14 \
+  --mem-throttle 32768 \
+  --quality 1.0 \
+  --name $NAME \
+  --ocr-lang=eng+chi_sim \
+  --ocr-ebooks \
+  --ocr-images \
+  --exclude=$EXCLUDE \
+  --rewrite-url=$REWRITE_URL \
+  --incremental=$ORIG \
+  --output=$NEW \
+  $CONTENT
+echo ">>> Scan complete"
+rm -rf $ORIG
+mv $NEW $ORIG 
+
+unset http_proxy
+unset https_proxy
+unset HTTP_PROXY
+unset HTTPS_PROXY
+sist2 index $ORIG --incremental-index
--- a/contrib/systemd/sist2-update.service
+++ b/contrib/systemd/sist2-update.service
@@ -0,0 +1,6 @@
+[Unit]
+Description=sist2-update
+
+[Service]
+User=yatli
+ExecStart=/bin/bash /usr/bin/sist2-update-all.sh
--- a/contrib/systemd/sist2-update.timer
+++ b/contrib/systemd/sist2-update.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=sist2-update
+
+[Timer]
+OnCalendar=*-*-* 3:00:00
+Persistent=true
+Unit=sist2-update.service
+
+[Install]
+WantedBy=timers.target
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -13,7 +13,6 @@
    * [options](#web-options)
    * [examples](#web-examples)
    * [rewrite_url](#rewrite_url)
-    * [link to specific indices](#link-to-specific-indices)
 * [elasticsearch](#elasticsearch)
 * [exec-script](#exec-script)
 * [tagging](#tagging)
@@ -26,58 +25,66 @@ Usage: sist2 scan [OPTION]... PATH
   or: sist2 exec-script [OPTION]... INDEX
 Lightning-fast file system indexer and search tool.

-    -h, --help                    show this help message and exit
-    -v, --version                 Show version and exit
-    --verbose                     Turn on logging
-    --very-verbose                Turn on debug messages
+    -h, --help                        show this help message and exit
+    -v, --version                     Show version and exit
+    --verbose                         Turn on logging
+    --very-verbose                    Turn on debug messages

 Scan options
-    -t, --threads=<int>           Number of threads. DEFAULT=1
-    -q, --quality=<flt>           Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3
-    --size=<int>                  Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
-    --content-size=<int>          Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
-    --incremental=<str>           Reuse an existing index and only scan modified files.
-    -o, --output=<str>            Output directory. DEFAULT=index.sist2/
-    --rewrite-url=<str>           Serve files from this url instead of from disk.
-    --name=<str>                  Index display name. DEFAULT: (name of the directory)
-    --depth=<int>                 Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
-    --archive=<str>               Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
-    --archive-passphrase=<str>    Passphrase for encrypted archive files
-    # TODO: add new --ocr-* options here
-    -e, --exclude=<str>           Files that match this regex will not be scanned
-    --fast                        Only index file names & mime type
-    --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
-    --mem-buffer=<int>            Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
-    --read-subtitles              Read subtitles from media files.
-    --fast-epub                   Faster but less accurate EPUB parsing (no thumbnails, metadata)
-    --checksums                   Calculate file checksums when scanning.
+    -t, --threads=<int>               Number of threads. DEFAULT=1
+    --mem-throttle=<int>              Total memory threshold in MiB for scan throttling. DEFAULT=0
+    -q, --thumbnail-quality=<flt>     Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
+    --thumbnail-size=<int>            Thumbnail size, in pixels. DEFAULT=500
+    --thumbnail-count=<int>           Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
+    --content-size=<int>              Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
+    --incremental=<str>               Reuse an existing index and only scan modified files.
+    -o, --output=<str>                Output directory. DEFAULT=index.sist2/
+    --rewrite-url=<str>               Serve files from this url instead of from disk.
+    --name=<str>                      Index display name. DEFAULT: (name of the directory)
+    --depth=<int>                     Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
+    --archive=<str>                   Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
+    --archive-passphrase=<str>        Passphrase for encrypted archive files
+    --ocr-lang=<str>                  Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
+    --ocr-images                      Enable OCR'ing of image files.
+    --ocr-ebooks                      Enable OCR'ing of ebook files.
+    -e, --exclude=<str>               Files that match this regex will not be scanned
+    --fast                            Only index file names & mime type
+    --treemap-threshold=<str>         Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
+    --mem-buffer=<int>                Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
+    --read-subtitles                  Read subtitles from media files.
+    --fast-epub                       Faster but less accurate EPUB parsing (no thumbnails, metadata)
+    --checksums                       Calculate file checksums when scanning.
+    --list-file=<str>                 Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.

 Index options
-    -t, --threads=<int>           Number of threads. DEFAULT=1
-    --es-url=<str>                Elasticsearch url with port. DEFAULT=http://localhost:9200
-    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
-    -p, --print                   Just print JSON documents to stdout.
-    --script-file=<str>           Path to user script.
-    --mappings-file=<str>         Path to Elasticsearch mappings.
-    --settings-file=<str>         Path to Elasticsearch settings.
-    --async-script                Execute user script asynchronously.
-    --batch-size=<int>            Index batch size. DEFAULT: 100
-    -f, --force-reset             Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
+    -t, --threads=<int>               Number of threads. DEFAULT=1
+    --es-url=<str>                    Elasticsearch url with port. DEFAULT=http://localhost:9200
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
+    -p, --print                       Just print JSON documents to stdout.
+    --incremental-index               Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
+    --script-file=<str>               Path to user script.
+    --mappings-file=<str>             Path to Elasticsearch mappings.
+    --settings-file=<str>             Path to Elasticsearch settings.
+    --async-script                    Execute user script asynchronously.
+    --batch-size=<int>                Index batch size. DEFAULT: 100
+    -f, --force-reset                 Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)

 Web options
-    --es-url=<str>                Elasticsearch url. DEFAULT=http://localhost:9200
-    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
-    --bind=<str>                  Listen on this address. DEFAULT=localhost:4090
-    --auth=<str>                  Basic auth in user:password format
-    --tag-auth=<str>              Basic auth in user:password format for tagging
-    --tagline=<str>               Tagline in navbar
-    --dev                         Serve html & js files from disk (for development)
+    --es-url=<str>                    Elasticsearch url. DEFAULT=http://localhost:9200
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
+    --bind=<str>                      Listen on this address. DEFAULT=localhost:4090
+    --auth=<str>                      Basic auth in user:password format
+    --tag-auth=<str>                  Basic auth in user:password format for tagging
+    --tagline=<str>                   Tagline in navbar
+    --dev                             Serve html & js files from disk (for development)
+    --lang=<str>                      Default UI language. Can be changed by the user

 Exec-script options
-    --es-url=<str>                Elasticsearch url. DEFAULT=http://localhost:9200
-    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
-    --script-file=<str>           Path to user script.
-    --async-script                Execute user script asynchronously.
+    --es-url=<str>                    Elasticsearch url. DEFAULT=http://localhost:9200
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
+    --script-file=<str>               Path to user script.
+    --async-script                    Execute user script asynchronously.
+Made by simon987 <me@simon987.net>. Released under GPL-3.0
 ```

 ## Scan
@@ -86,13 +93,21 @@ Exec-script options

 * `-t, --threads` 
      Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
-* `-q, --quality` 
+* `--mem-throttle`
+    Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
+    until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
+* `-q, --thumbnail-quality` 
    Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
-* `--size` 
+* `--thumbnail-size` 
    Thumbnail size in pixels.
+* `--thumbnail-count`
+    Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
+    will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image 
+    every ~7s). Set to 0 to completely disable thumbnails.
 * `--content-size` 
-    Number of bytes of text to be extracted from the content of files (plain text and PDFs).
+    Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
    Repeated whitespace and special characters do not count toward this limit.
+    Set to 0 to completely disable content parsing.
 * `--incremental`
    Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
    will be copied to the new index and will not be parsed again.
@@ -105,7 +120,7 @@ Exec-script options
    * list: Only get file names as text
    * shallow: Don't parse archives inside archives.
    * recurse: Scan archives recursively (default)
-* `--ocr` See [OCR](../README.md#OCR)
+* `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
 * `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any 
    part of the full absolute path.
    
@@ -125,13 +140,13 @@ Exec-script options
    In effect, smaller `treemap-threshold` values will yield a more detailed 
    (but also a more cluttered and harder to read) visualization. 
    
-* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files 
+* `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files 
    larger than this number will be read sequentially and no *seek* operations will be supported.

    To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
 * `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
 * `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
-* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read 
+* `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read 
  operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
  files are hidden in the web UI (this behaviour can be toggled in the Configuration page).

@@ -201,6 +216,9 @@ and values are raw image bytes.
    Elasticsearch index name. DEFAULT=sist2
 * `-p, --print` 
    Print index in JSON format to stdout.
+ * `--incremental-index`
+   Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
+   Only the new changes since the last scan will be sent.
 * `--script-file` 
    Path to user script. See [Scripting](scripting.md).
 * `--mappings-file`
@@ -248,7 +266,9 @@ sist2 index --print ./my_index/ | jq | less
    `--auth` argument, but authentication is only applied the `/tag/` endpoint.
 * `--tagline=<str>` When specified, will replace the default tagline in the navbar.
 * `--dev` Serve html & js files from disk (for development, used to modify frontend files without having to recompile)
- 
+ * `--lang=<str>` Set the default web UI language (See #180 for a list of supported languages, default
+   is `en`). The user can change the language in the configuration page
+
 ### Web examples

 **Single index**
@@ -272,7 +292,7 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the

 # Elasticsearch

-Elasticsearch versions >=6.8.0, <8.0.0 are supported by sist2. 
+Elasticsearch versions >=6.8.0, 7.X.X and 8.X.X are supported by sist2. 

 Using a version >=7.14.0 is recommended to enable the following features:

--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -39,7 +39,7 @@
      "index": false
    },
    "thumbnail": {
-      "type": "keyword",
+      "type": "integer",
      "index": false
    },
    "videoc": {
--- a/schema/settings.json
+++ b/schema/settings.json
@@ -3,7 +3,7 @@
    "refresh_interval": "30s",
    "codec": "best_compression",
    "number_of_replicas": 0,
-    "highlight.max_analyzed_offset": 10000000
+    "highlight.max_analyzed_offset": 1000000
  },
  "analysis": {
    "tokenizer": {
@@ -16,7 +16,7 @@
        "delimiter": "."
      },
      "my_nGram_tokenizer": {
-        "type": "nGram",
+        "type": "ngram",
        "min_gram": 3,
        "max_gram": 3
      }
--- a/scripts/before_build.sh
+++ b/scripts/before_build.sh
@@ -5,5 +5,6 @@ rm -rf index.sist2/
 python3 scripts/mime.py > src/parsing/mime_generated.c
 python3 scripts/serve_static.py > src/web/static_generated.c
 python3 scripts/index_static.py > src/index/static_generated.c
+python3 scripts/magic_static.py > src/magic_generated.c

 printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
--- a/scripts/magic_static.py
+++ b/scripts/magic_static.py
@@ -0,0 +1,8 @@
+
+try:
+    with open("/usr/lib/file/magic.mgc", "rb") as f:
+        data = f.read()
+except:
+    data = bytes([])
+
+print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))
--- a/scripts/start_dev_es.sh
+++ b/scripts/start_dev_es.sh
@@ -0,0 +1,3 @@
+docker run --rm -it --name "sist2-dev-es"\
+       	-p 9200:9200 -e "discovery.type=single-node" \
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
--- a/scripts/start_dev_es_6.sh
+++ b/scripts/start_dev_es_6.sh
@@ -0,0 +1,3 @@
+docker run --rm -it --name "sist2-dev-es-6"\
+       	-p 9202:9200 -e "discovery.type=single-node" \
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:6.8.0
--- a/scripts/start_dev_es_8.sh
+++ b/scripts/start_dev_es_8.sh
@@ -0,0 +1,3 @@
+docker run --rm -it --name "sist2-dev-es"\
+       	-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2
--- a/sist2-vue/dist/css/index.css
+++ b/sist2-vue/dist/css/index.css
--- a/sist2-vue/dist/js/chunk-vendors.js
+++ b/sist2-vue/dist/js/chunk-vendors.js
--- a/sist2-vue/dist/js/index.js
+++ b/sist2-vue/dist/js/index.js
--- a/sist2-vue/package-lock.json
+++ b/sist2-vue/package-lock.json
--- a/sist2-vue/package.json
+++ b/sist2-vue/package.json
@@ -8,10 +8,9 @@
  },
  "dependencies": {
    "@egjs/vue-infinitegrid": "3.3.0",
-    "axios": "^0.21.1",
+    "axios": "^0.25.0",
    "bootstrap-vue": "^2.21.2",
    "core-js": "^3.6.5",
-    "crypto-es": "^1.2.7",
    "d3": "^5.16.0",
    "date-fns": "^2.21.3",
    "dom-to-image": "^2.6.0",
--- a/sist2-vue/src/App.vue
+++ b/sist2-vue/src/App.vue
@@ -146,6 +146,7 @@ html, body {

 .theme-black .nav-tabs .nav-link {
  color: #e0e0e0;
+  border-radius: 0;
 }

 .theme-black .nav-tabs .nav-item.show .nav-link, .theme-black .nav-tabs .nav-link.active {
@@ -309,4 +310,8 @@ mark {
  display: inline-block;
  width: 40%;
 }
+
+.pointer {
+  cursor: pointer;
+}
 </style>
--- a/sist2-vue/src/Sist2Api.ts
+++ b/sist2-vue/src/Sist2Api.ts
@@ -1,6 +1,5 @@
 import axios from "axios";
 import {ext, strUnescape, lum} from "./util";
-import CryptoES from 'crypto-es';

 export interface EsTag {
    id: string
@@ -30,7 +29,6 @@ export interface EsHit {
    _index: string
    _id: string
    _score: number
-    _path_md5: string
    _type: string
    _tags: Tag[]
    _seq: number
@@ -62,8 +60,9 @@ export interface EsHit {
        isPlayableImage: boolean
        isAudio: boolean
        hasThumbnail: boolean
-        tnW: number
-        tnH: number
+        hasVidPreview: boolean
+        /** Number of thumbnails available */
+        tnNum: number
    }
    highlight: {
        name: string[] | undefined,
@@ -134,8 +133,15 @@ class Sist2Api {

        if ("thumbnail" in hit._source) {
            hit._props.hasThumbnail = true;
-            hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
-            hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
+
+            if (Number.isNaN(Number(hit._source.thumbnail))) {
+                // Backwards compatibility
+                hit._props.tnNum = 1;
+                hit._props.hasVidPreview = false;
+            } else {
+                hit._props.tnNum = Number(hit._source.thumbnail);
+                hit._props.hasVidPreview = hit._props.tnNum > 1;
+            }
        }

        switch (mimeCategory) {
@@ -241,11 +247,6 @@ class Sist2Api {
                res.hits.hits.forEach((hit: EsHit) => {
                    hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
                    hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
-                    hit["_path_md5"] = CryptoES.MD5(
-                        hit["_source"]["path"] +
-                        (hit["_source"]["path"] ? "/" : "") +
-                        hit["_source"]["name"] + ext(hit)
-                    ).toString();

                    this.setHitProps(hit);
                    this.setHitTags(hit);
@@ -335,10 +336,6 @@ class Sist2Api {
        };
    }

-    getDocInfo(docId: string) {
-        return axios.get(`${this.baseUrl}d/${docId}`);
-    }
-
    getTags() {
        return this.esQuery({
            aggs: {
@@ -372,8 +369,7 @@ class Sist2Api {
        return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
            delete: false,
            name: tag,
-            doc_id: hit["_id"],
-            path_md5: hit._path_md5
+            doc_id: hit["_id"]
        });
    }

@@ -381,8 +377,7 @@ class Sist2Api {
        return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
            delete: true,
            name: tag,
-            doc_id: hit["_id"],
-            path_md5: hit._path_md5
+            doc_id: hit["_id"]
        });
    }

--- a/sist2-vue/src/Sist2Query.ts
+++ b/sist2-vue/src/Sist2Query.ts
@@ -69,7 +69,7 @@ interface SortMode {

 class Sist2Query {

-    searchQuery(): any {
+    searchQuery(blankSearch: boolean = false): any {

        const getters = store.getters;

@@ -93,22 +93,6 @@ class Sist2Query {
            {terms: {index: selectedIndexIds}}
        ] as any[];

-        if (sizeMin && sizeMax) {
-            filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
-        } else if (sizeMin) {
-            filters.push({range: {size: {gte: sizeMin}}})
-        } else if (sizeMax) {
-            filters.push({range: {size: {lte: sizeMax}}})
-        }
-
-        if (dateMin && dateMax) {
-            filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
-        } else if (dateMin) {
-            filters.push({range: {mtime: {gte: dateMin}}})
-        } else if (dateMax) {
-            filters.push({range: {mtime: {lte: dateMax}}})
-        }
-
        const fields = [
            "name^8",
            "content^3",
@@ -128,20 +112,39 @@ class Sist2Query {
            fields.push("name.nGram^3");
        }

-        const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
-        if (path !== "") {
-            filters.push({term: {path: path}})
-        }
+        if (!blankSearch) {
+            if (sizeMin && sizeMax) {
+                filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
+            } else if (sizeMin) {
+                filters.push({range: {size: {gte: sizeMin}}})
+            } else if (sizeMax) {
+                filters.push({range: {size: {lte: sizeMax}}})
+            }

-        if (selectedMimeTypes.length > 0) {
-            filters.push({terms: {"mime": selectedMimeTypes}});
-        }
+            if (dateMin && dateMax) {
+                filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
+            } else if (dateMin) {
+                filters.push({range: {mtime: {gte: dateMin}}})
+            } else if (dateMax) {
+                filters.push({range: {mtime: {lte: dateMax}}})
+            }

-        if (selectedTags.length > 0) {
-            if (getters.optTagOrOperator) {
-                filters.push({terms: {"tag": selectedTags}});
-            } else {
-                selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
+            const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
+
+            if (path !== "") {
+                filters.push({term: {path: path}})
+            }
+
+            if (selectedMimeTypes.length > 0) {
+                filters.push({terms: {"mime": selectedMimeTypes}});
+            }
+
+            if (selectedTags.length > 0) {
+                if (getters.optTagOrOperator) {
+                    filters.push({terms: {"tag": selectedTags}});
+                } else {
+                    selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
+                }
            }
        }

@@ -182,7 +185,7 @@ class Sist2Query {
            size: size,
        } as any;

-        if (!empty) {
+        if (!empty && !blankSearch) {
            q.query.bool.must = query;
        }

@@ -207,7 +210,7 @@ class Sist2Query {
            };

            if (!legacyES) {
-                q.highlight.max_analyzed_offset = 9_999_999;
+                q.highlight.max_analyzed_offset = 999_999;
            }

            if (getters.optSearchInPath) {
@@ -237,7 +240,7 @@ class Sist2Query {
                }
            }

-            if (!empty) {
+            if (!empty && !blankSearch) {
                q.query.function_score.query.bool.must.push(query);
            }
        }
--- a/sist2-vue/src/components/DateSlider.vue
+++ b/sist2-vue/src/components/DateSlider.vue
@@ -1,5 +1,31 @@
 <template>
-  <div id="dateSlider"></div>
+  <div v-if="$store.state.optUseDatePicker">
+    <b-row>
+      <b-col sm="6">
+        <b-form-datepicker
+            value-as-date
+            :date-format-options="{ year: 'numeric', month: '2-digit', day: '2-digit' }"
+            :locale="$store.state.optLang"
+            class="mb-2"
+            :value="dateMin" @input="setDateMin"></b-form-datepicker>
+      </b-col>
+      <b-col sm="6">
+        <b-form-datepicker
+            value-as-date
+            :date-format-options="{ year: 'numeric', month: '2-digit', day: '2-digit' }"
+            :locale="$store.state.optLang"
+            class="mb-2"
+            :value="dateMax" @input="setDateMax"></b-form-datepicker>
+      </b-col>
+    </b-row>
+  </div>
+  <div v-else>
+    <b-row>
+      <b-col style="height: 70px;">
+        <div id="dateSlider"></div>
+      </b-col>
+    </b-row>
+  </div>
 </template>

 <script>
@@ -10,11 +36,36 @@ import {mergeTooltips} from "@/util-js";

 export default {
  name: "DateSlider",
+  methods: {
+    setDateMin(val) {
+      const epochDate = Math.ceil(+val / 1000);
+      this.$store.commit("setDateMin", epochDate);
+    },
+    setDateMax(val) {
+      const epochDate = Math.ceil(+val / 1000);
+      this.$store.commit("setDateMax", epochDate);
+    },
+  },
+  computed: {
+    dateMin() {
+      const dateMin = this.$store.state.dateMin ? this.$store.state.dateMin : this.$store.state.dateBoundsMin;
+      return new Date(dateMin * 1000)
+    },
+    dateMax() {
+      const dateMax = this.$store.state.dateMax ? this.$store.state.dateMax : this.$store.state.dateBoundsMax;
+      return new Date(dateMax * 1000)
+    }
+  },
  mounted() {
    this.$store.subscribe((mutation) => {
      if (mutation.type === "setDateBoundsMax") {
        const elem = document.getElementById("dateSlider");

+        if (elem === null) {
+          // Using b-form-datepicker, skip initialisation of slider
+          return
+        }
+
        if (elem.children.length > 0) {
          return;
        }
--- a/sist2-vue/src/components/DebugInfo.vue
+++ b/sist2-vue/src/components/DebugInfo.vue
@@ -30,6 +30,7 @@ export default {
        {key: "esIndex", value: this.$store.state.sist2Info.esIndex},
        {key: "tagline", value: this.$store.state.sist2Info.tagline},
        {key: "dev", value: this.$store.state.sist2Info.dev},
+        {key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion},
        {key: "esVersion", value: this.$store.state.sist2Info.esVersion},
        {key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
        {key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},
--- a/sist2-vue/src/components/DocCard.vue
+++ b/sist2-vue/src/components/DocCard.vue
@@ -1,5 +1,6 @@
 <template>
-  <div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`">
+  <div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`"
+       @click="$store.commit('busTnTouchStart', null)">
    <b-card
        no-body
        img-top
@@ -10,39 +11,7 @@
      <ContentDiv :doc="doc"></ContentDiv>

      <!-- Thumbnail-->
-      <div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
-        <div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
-          <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
-        </div>
-
-        <div
-            v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
-            class="card-img-overlay"
-            :class="{'small-badge': smallBadge}">
-          <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
-        </div>
-
-        <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
-             class="card-img-overlay"
-             :class="{'small-badge': smallBadge}">
-          <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
-        </div>
-
-        <div v-if="doc._props.isPlayableVideo" class="play">
-          <svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
-            <path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
-          </svg>
-        </div>
-
-        <img ref="tn"
-             v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
-             :src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
-             alt=""
-             :style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
-             class="pointer fit card-img-top" @click="onThumbnailClick()">
-        <img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
-             class="fit card-img-top">
-      </div>
+      <FullThumbnail :doc="doc" :small-badge="smallBadge" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>

      <!-- Audio player-->
      <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
@@ -73,31 +42,19 @@ import TagContainer from "@/components/TagContainer.vue";
 import DocFileTitle from "@/components/DocFileTitle.vue";
 import DocInfoModal from "@/components/DocInfoModal.vue";
 import ContentDiv from "@/components/ContentDiv.vue";
+import FullThumbnail from "@/components/FullThumbnail";


 export default {
-  components: {ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
+  components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
  props: ["doc", "width"],
  data() {
    return {
      ext: ext,
      showInfo: false,
-      hover: false
    }
  },
  computed: {
-    placeHolderStyle() {
-
-      const tokens = this.doc._source.thumbnail.split(",");
-      const w = Number(tokens[0]);
-      const h = Number(tokens[1]);
-
-      const MAX_HEIGHT = 400;
-
-      return {
-        height: `${Math.min((h / w) * this.width, MAX_HEIGHT)}px`,
-      }
-    },
    smallBadge() {
      return this.width < 150;
    }
@@ -119,31 +76,10 @@ export default {
        }
      });
    },
-    onTnEnter() {
-      this.hover = true;
-    },
-    onTnLeave() {
-      this.hover = false;
-    },
-    tnHeight() {
-      return this.$refs.tn.height;
-    }
  },
 }
 </script>
 <style>
-.img-wrapper {
-  position: relative;
-}
-
-.img-wrapper:hover svg {
-  fill: rgba(0, 0, 0, 1);
-}
-
-.pointer {
-  cursor: pointer;
-}
-
 .fit {
  display: block;
  min-width: 64px;
@@ -153,15 +89,17 @@ export default {
  width: auto;
  height: auto;
 }
+
+.audio-fit {
+  height: 39px;
+  vertical-align: bottom;
+  display: inline;
+  width: 100%;
+}
 </style>

 <style scoped>

-.card-img-top {
-  border-top-left-radius: 0;
-  border-top-right-radius: 0;
-}
-
 .padding-03 {
  padding: 0.3rem;
 }
@@ -179,55 +117,11 @@ export default {
  padding: 0.3rem;
 }

-.thumbnail-placeholder {
-
-}
-
-.card-img-overlay {
-  pointer-events: none;
-  padding: 0.75rem;
-  bottom: unset;
-  top: 0;
-  left: unset;
-  right: unset;
-}
-
-.badge-resolution {
-  color: #212529;
-  background-color: #FFC107;
-}
-
-.play {
-  position: absolute;
-  width: 25px;
-  height: 25px;
-  left: 50%;
-  top: 50%;
-  transform: translate(-50%, -50%);
-  pointer-events: none;
-}
-
-.play svg {
-  fill: rgba(0, 0, 0, 0.7);
-}
-
 .doc-card {
  padding-left: 3px;
  padding-right: 3px;
 }

-.small-badge {
-  padding: 1px 3px;
-  font-size: 70%;
-}
-
-.audio-fit {
-  height: 39px;
-  vertical-align: bottom;
-  display: inline;
-  width: 100%;
-}
-
 .sub-document .card {
  background: #AB47BC1F !important;
 }
--- a/sist2-vue/src/components/DocInfoModal.vue
+++ b/sist2-vue/src/components/DocInfoModal.vue
@@ -2,7 +2,10 @@
  <b-modal :visible="show" size="lg" :hide-footer="true" static lazy @close="$emit('close')" @hide="$emit('close')"
  >
    <template #modal-title>
-      <h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
+      <h5 class="modal-title" :title="doc._source.name + ext(doc)">
+        {{ doc._source.name + ext(doc) }}
+        <router-link :to="`/file?byId=${doc._id}`">#</router-link>
+      </h5>
    </template>

    <img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
--- a/sist2-vue/src/components/DocListItem.vue
+++ b/sist2-vue/src/components/DocListItem.vue
@@ -1,11 +1,13 @@
 <template>
  <b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}"
-                     @mouseenter="onTnEnter()" @mouseleave="onTnLeave()" >
+                     @mouseenter="onTnEnter()" @mouseleave="onTnLeave()">

    <!-- Info modal-->
    <DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>

    <div class="media ml-2">
+
+      <!-- Thumbnail-->
      <div v-if="doc._props.hasThumbnail" class="align-self-start mr-2 wrapper-sm">
        <div class="img-wrapper">
          <div v-if="doc._props.isPlayableVideo" class="play">
@@ -26,6 +28,7 @@
        <FileIcon></FileIcon>
      </div>

+      <!-- Doc line-->
      <div class="doc-line ml-3">
        <div style="display: flex">
          <span class="info-icon" @click="showInfo = true"></span>
@@ -154,6 +157,7 @@ export default {
 .list-group-item .img-wrapper {
  width: 88px;
  height: 88px;
+  position: relative;
 }

 .fit-sm {
--- a/sist2-vue/src/components/FullThumbnail.vue
+++ b/sist2-vue/src/components/FullThumbnail.vue
@@ -0,0 +1,173 @@
+<template>
+  <div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()"
+       @touchstart="onTouchStart()">
+    <div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
+      <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
+    </div>
+
+    <div
+        v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
+        class="card-img-overlay"
+        :class="{'small-badge': smallBadge}">
+      <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
+    </div>
+
+    <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
+         class="card-img-overlay"
+         :class="{'small-badge': smallBadge}">
+      <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
+    </div>
+
+    <div v-if="doc._props.isPlayableVideo" class="play">
+      <svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
+        <path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
+      </svg>
+    </div>
+
+    <img ref="tn"
+         v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
+         :src="tnSrc"
+         alt=""
+         :style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
+         class="pointer fit card-img-top" @click="onThumbnailClick()">
+    <img v-else :src="tnSrc" alt=""
+         class="fit card-img-top">
+
+    <ThumbnailProgressBar v-if="hover && doc._props.hasVidPreview"
+                          :progress="(currentThumbnailNum + 1) / (doc._props.tnNum)"
+    ></ThumbnailProgressBar>
+  </div>
+</template>
+
+<script>
+import {humanTime} from "@/util";
+import ThumbnailProgressBar from "@/components/ThumbnailProgressBar";
+
+export default {
+  name: "FullThumbnail",
+  props: ["doc", "smallBadge"],
+  components: {ThumbnailProgressBar},
+  data() {
+    return {
+      hover: false,
+      currentThumbnailNum: 0,
+      timeoutId: null
+    }
+  },
+  created() {
+    this.$store.subscribe((mutation) => {
+      if (mutation.type === "busTnTouchStart" && mutation.payload !== this.doc._id) {
+        this.onTnLeave();
+      }
+    });
+  },
+  computed: {
+    tnSrc() {
+      const doc = this.doc;
+      const props = doc._props;
+      if (props.isGif && this.hover) {
+        return `f/${doc._id}`;
+      }
+      return (this.currentThumbnailNum === 0)
+          ? `t/${doc._source.index}/${doc._id}`
+          : `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
+    },
+  },
+  methods: {
+    humanTime: humanTime,
+    onThumbnailClick() {
+      this.$emit("onThumbnailClick");
+    },
+    tnHeight() {
+      return this.$refs.tn.height;
+    },
+    tnWidth() {
+      return this.$refs.tn.width;
+    },
+    onTnEnter() {
+      this.hover = true;
+      if (this.doc._props.hasVidPreview) {
+        this.currentThumbnailNum += 1;
+        this.scheduleNextTnNum();
+      }
+    },
+    onTnLeave() {
+      this.currentThumbnailNum = 0;
+      this.hover = false;
+      if (this.timeoutId !== null) {
+        window.clearTimeout(this.timeoutId);
+        this.timeoutId = null;
+      }
+    },
+    scheduleNextTnNum() {
+      const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
+      this.timeoutId = window.setTimeout(() => {
+        if (!this.hover) {
+          return;
+        }
+        this.scheduleNextTnNum();
+        if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
+          this.currentThumbnailNum = 0;
+        } else {
+          this.currentThumbnailNum += 1;
+        }
+      }, INTERVAL);
+    },
+    onTouchStart() {
+      this.$store.commit("busTnTouchStart", this.doc._id);
+      if (!this.hover) {
+        this.onTnEnter()
+      }
+    },
+  }
+}
+</script>
+
+<style scoped>
+.img-wrapper {
+  position: relative;
+}
+
+.img-wrapper:hover svg {
+  fill: rgba(0, 0, 0, 1);
+}
+
+.card-img-top {
+  border-top-left-radius: 0;
+  border-top-right-radius: 0;
+}
+
+.play {
+  position: absolute;
+  width: 25px;
+  height: 25px;
+  left: 50%;
+  top: 50%;
+  transform: translate(-50%, -50%);
+  pointer-events: none;
+}
+
+.play svg {
+  fill: rgba(0, 0, 0, 0.7);
+}
+
+.badge-resolution {
+  color: #212529;
+  background-color: #FFC107;
+}
+
+.card-img-overlay {
+  pointer-events: none;
+  padding: 0.75rem;
+  bottom: unset;
+  top: 0;
+  left: unset;
+  right: unset;
+}
+
+.small-badge {
+  padding: 1px 3px;
+  font-size: 70%;
+}
+
+</style>
--- a/sist2-vue/src/components/IndexPicker.vue
+++ b/sist2-vue/src/components/IndexPicker.vue
@@ -29,7 +29,7 @@
          :class="{active: lastClickIndex === idx}"
      >
        <div class="d-flex">
-          <b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
+          <b-checkbox style="pointer-events: none" :checked="isSelected(idx)"></b-checkbox>
          {{ idx.name }}
          <span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
        </div>
@@ -168,4 +168,24 @@ export default Vue.extend({
  background-color: inherit;
  color: inherit;
 }
+
+.theme-black .list-group-item {
+  border: 1px solid rgba(255,255,255, 0.1);
+}
+
+.theme-black .list-group-item:first-child {
+  border: 1px solid rgba(255,255,255, 0.05);
+}
+
+.theme-black .list-group-item.active {
+  z-index: 2;
+  background-color: inherit;
+  color: inherit;
+  border: 1px solid rgba(255,255,255, 0.3);
+  border-radius: 0;
+}
+
+.theme-black .list-group {
+  border-radius: 0;
+}
 </style>
--- a/sist2-vue/src/components/InfoTable.vue
+++ b/sist2-vue/src/components/InfoTable.vue
@@ -1,6 +1,5 @@
 <template>
  <b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0 mt-4">
-
    <template #cell(value)="data">
      <span v-if="'html' in data.item" v-html="data.item.html"></span>
      <span v-else>{{ data.value }}</span>
@@ -33,12 +32,18 @@ function dmsToDecimal(dms, ref) {
 export default {
  name: "InfoTable",
  props: ["doc"],
+  data() {
+    return {
+      indexName: "loading..."
+    }
+  },
  computed: {
    tableItems() {
+      this.indexName;
      const src = this.doc._source;

      const items = [
-        {key: "index", value: `[${this.$store.getters.indexMap[src.index].name}]`},
+        {key: "index", value: `[${this.indexName}]`},
        {key: "mtime", value: humanDate(src.mtime)},
        {key: "mime", value: src.mime},
        {key: "size", value: humanFileSize(src.size)},
@@ -67,6 +72,12 @@ export default {
        }
      });

+      Object.keys(src).forEach(key => {
+        if (key.startsWith("mt_") || key.startsWith("int_")) {
+          items.push({key: key, value: src[key]});
+        }
+      });
+
      // Exif GPS
      if ("exif_gps_longitude_dec" in src) {
        items.push({
@@ -85,7 +96,16 @@ export default {

      return items;
    }
-  }
+  },
+  mounted() {
+    if (this.$store.getters.indexMap[this.doc.index]) {
+      this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
+    }
+
+    window.setTimeout(() => {
+      this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
+    }, 500)
+  },
 }
 </script>

--- a/sist2-vue/src/components/LazyContentDiv.vue
+++ b/sist2-vue/src/components/LazyContentDiv.vue
@@ -1,11 +1,13 @@
 <template>
  <Preloader v-if="loading"></Preloader>
-  <div v-else-if="content" class="content-div">{{ content }}</div>
+  <div v-else-if="content" class="content-div" v-html="content"></div>
 </template>

 <script>
 import Sist2Api from "@/Sist2Api";
 import Preloader from "@/components/Preloader";
+import Sist2Query from "@/Sist2Query";
+import store from "@/store";

 export default {
  name: "LazyContentDiv",
@@ -18,10 +20,72 @@ export default {
    }
  },
  mounted() {
-    Sist2Api.getDocInfo(this.docId).then(src => {
-      this.content = src.data.content;
+    const query = Sist2Query.searchQuery();
+
+    if (this.$store.state.optHighlight) {
+
+      const fields = this.$store.state.fuzzy
+          ? {"content.nGram": {}}
+          : {content: {}};
+
+      query.highlight = {
+        pre_tags: ["<mark>"],
+        post_tags: ["</mark>"],
+        number_of_fragments: 0,
+        fields,
+      };
+
+      if (!store.state.sist2Info.esVersionLegacy) {
+        query.highlight.max_analyzed_offset = 999_999;
+      }
+    }
+
+    if ("function_score" in query.query) {
+      query.query = query.query.function_score.query;
+    }
+
+    if (!("must" in query.query.bool)) {
+      query.query.bool.must = [];
+    } else if (!Array.isArray(query.query.bool.must)) {
+      query.query.bool.must = [query.query.bool.must];
+    }
+
+    query.query.bool.must.push({match: {_id: this.docId}});
+
+    delete query["sort"];
+    delete query["aggs"];
+    delete query["search_after"];
+    delete query.query["function_score"];
+
+    query._source = {
+      includes: ["content", "name", "path", "extension"]
+    }
+
+    query.size = 1;
+
+    Sist2Api.esQuery(query).then(resp => {
      this.loading = false;
-    })
+      if (resp.hits.hits.length === 1) {
+        this.content = this.getContent(resp.hits.hits[0]);
+      } else {
+        console.log("FIXME: could not get content")
+        console.log(resp)
+      }
+    });
+  },
+  methods: {
+    getContent(doc) {
+      if (!doc.highlight) {
+        return doc._source.content;
+      }
+
+      if (doc.highlight["content.nGram"]) {
+        return doc.highlight["content.nGram"][0];
+      }
+      if (doc.highlight.content) {
+        return doc.highlight.content[0];
+      }
+    }
  }
 }
 </script>
--- a/sist2-vue/src/components/Lightbox.vue
+++ b/sist2-vue/src/components/Lightbox.vue
@@ -1,6 +1,7 @@
 <template>
-  <div>
+  <div :class="{'disable-animations': $store.state.optSimpleLightbox}">
    <FsLightbox
+        ref="lightbox"
        :key="lightboxKey"
        :toggler="showLightbox"
        :sources="lightboxSources"
@@ -10,7 +11,7 @@
        :source-index="lightboxSlide"
        :custom-toolbar-buttons="customButtons"
        :slideshow-time="$store.getters.optLightboxSlideDuration * 1000"
-        :zoom-increment="0.5"
+        :zoom-increment="0.25"
        :load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent"
        :on-close="onClose"
        :on-open="onShow"
@@ -29,6 +30,7 @@ export default {
  components: {FsLightbox},
  data() {
    return {
+      disableAnimations: true,
      customButtons: [
        {
          viewBox: "0 0 384.928 384.928",
@@ -64,7 +66,84 @@ export default {
      return this.$store.getters["uiLightboxTypes"];
    }
  },
+  mounted() {
+    const listener = document.onkeydown;
+
+    document.onkeydown = (e) => {
+
+      const ret = this.keyDownListener(e)
+
+      if (listener && ret) {
+        return listener(e);
+      }
+    };
+  },
  methods: {
+    keyDownListener(e) {
+
+      const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
+
+      if (isLightboxOpen) {
+        return true;
+      }
+
+      const lightboxStore = this.$refs.lightbox.fsLightboxStore.slice(-1)[0];
+
+      switch (e.key) {
+        case " ": {
+          e.preventDefault();
+          e.stopPropagation();
+          e.stopImmediatePropagation();
+
+          // Find video at current slide, toggle play/pause
+          [...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
+            if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
+              const vid = elem.getElementsByTagName("video")[0];
+
+              if (vid) {
+                if (vid.paused) {
+                  vid.play();
+                } else {
+                  vid.pause()
+                }
+              }
+            }
+
+            return false;
+          });
+
+          return false;
+        }
+        case "ArrowUp":
+        case "k": {
+          if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
+            lightboxStore.core.thumbsToggler.toggleThumbs();
+          }
+          return false;
+        }
+        case "ArrowDown":
+        case "j": {
+          if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
+            lightboxStore.core.thumbsToggler.toggleThumbs();
+          }
+          return false;
+        }
+        case "h": {
+          if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
+            lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
+          }
+          return false;
+        }
+        case "l": {
+          if (lightboxStore.core.stageManager.getNextSlideIndex) {
+            lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
+          }
+          return false;
+        }
+      }
+
+      return true;
+    },
    onDownloadClick() {
      const url = this.lightboxSources[this.lightboxSlide];

@@ -125,4 +204,20 @@ export default {
 .fslightbox-toolbar-button:nth-child(7) {
  order: 7;
 }
+
+.disable-animations .fslightbox-container {
+  background: rgba(30,30,30,.9);
+}
+
+.disable-animations .fslightbox-transform-transition {
+  transition: none;
+}
+
+.disable-animations .fslightbox-fade-in-strong {
+  animation: none;
+}
+
+.fslightbox-container video, .fslightbox-container img {
+  cursor: unset !important;
+}
 </style>
--- a/sist2-vue/src/components/LightboxCaption.vue
+++ b/sist2-vue/src/components/LightboxCaption.vue
@@ -3,7 +3,7 @@
    <p>
      <b>{{
          `[${$store.getters.indices.find(i => i.id === hit._source.index).name}]`
-        }}</b>{{ `/${hit._source.path}/${hit._source.name}${ext(hit)}` }}
+        }}</b>{{ `${hit._source.path === '' ? '' : '/'}${hit._source.path}/${hit._source.name}${ext(hit)}` }}
    </p>
    <p style="margin-top: -1em">
      <span v-if="hit._source.width">{{ `${hit._source.width}x${hit._source.height}`}}</span>
--- a/sist2-vue/src/components/MimePicker.vue
+++ b/sist2-vue/src/components/MimePicker.vue
@@ -16,7 +16,8 @@ export default {
  data() {
    return {
      mimeTree: null,
-      stashedMimeTreeAttributes: null
+      stashedMimeTreeAttributes: null,
+      updateBusy: false
    }
  },
  mounted() {
@@ -34,6 +35,10 @@ export default {
        return;
      }

+      if (this.updateBusy) {
+        return;
+      }
+
      this.$store.commit("setSelectedMimeTypes", getSelectedTreeNodes(this.mimeTree));
    },
    updateTree() {
@@ -42,6 +47,11 @@ export default {
        return;
      }

+      if (this.updateBusy) {
+        return
+      }
+      this.updateBusy = true;
+
      if (this.stashedMimeTreeAttributes === null) {
        this.stashedMimeTreeAttributes = getTreeNodeAttributes(this.mimeTree);
      }
@@ -78,6 +88,7 @@ export default {
          }
        });
        this.stashedMimeTreeAttributes = null;
+        this.updateBusy = false;
      });
    },

--- a/sist2-vue/src/components/TagPicker.vue
+++ b/sist2-vue/src/components/TagPicker.vue
@@ -1,5 +1,13 @@
 <template>
-  <div id="tagTree"></div>
+  <div>
+    <b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
+      <b-form-input :value="filter"
+                    :placeholder="$t('tagFilter')"
+                    @input="onFilter($event)"></b-form-input>
+    </b-input-group>
+
+    <div id="tagTree"></div>
+  </div>
 </template>

 <script>
@@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {

 export default {
  name: "TagPicker",
+  props: ["showSearchBar"],
  data() {
    return {
      tagTree: null,
      loadedFromArgs: false,
+      filter: ""
    }
  },
  mounted() {
@@ -129,6 +139,10 @@ export default {
    });
  },
  methods: {
+    onFilter(value) {
+      this.filter = value;
+      this.tagTree.search(value);
+    },
    initializeTree() {
      const tagMap = [];
      this.tagTree = new InspireTree({
@@ -163,7 +177,8 @@ export default {
      });
    },
    handleTreeClick(node, e) {
-      if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
+      if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
+          || e === "matched" || e === "hidden") {
        return;
      }

@@ -180,7 +195,15 @@ export default {
 }
 </style>
 <style>
-.inspire-tree .focused>.wholerow {
+.inspire-tree .focused > .wholerow {
  border: none;
 }
+
+#tag-picker-filter-bar {
+  padding: 10px 4px 4px;
+}
+
+.theme-black .inspire-tree .matched > .wholerow {
+  background: rgba(251, 191, 41, 0.25);
+}
 </style>
--- a/sist2-vue/src/components/ThumbnailProgressBar.vue
+++ b/sist2-vue/src/components/ThumbnailProgressBar.vue
@@ -0,0 +1,40 @@
+<template>
+  <div class="thumbnail-progress-bar" :style="{width: `${percentProgress}%`}"></div>
+</template>
+
+<script>
+export default {
+  name: "ThumbnailProgressBar",
+  props: ["doc", "progress"],
+  computed: {
+    percentProgress() {
+      return Math.min(Math.max(this.progress * 100, 0), 100);
+    }
+  }
+}
+</script>
+
+<style scoped>
+
+.thumbnail-progress-bar {
+  position: absolute;
+  left: 0;
+  bottom: 0;
+
+  height: 4px;
+  background: #2196f3AA;
+
+  z-index: 9;
+}
+
+.theme-black .thumbnail-progress-bar {
+  background: rgba(0, 188, 212, 0.95);
+}
+
+.sub-document .thumbnail-progress-bar {
+  max-width: calc(100% - 8px);
+  left: 4px;
+}
+
+
+</style>
--- a/sist2-vue/src/i18n/messages.ts
+++ b/sist2-vue/src/i18n/messages.ts
@@ -1,5 +1,8 @@
 export default {
    en: {
+        filePage: {
+          notFound: "Not found"
+        },
        searchBar: {
            simple: "Search",
            advanced: "Advanced search",
@@ -13,6 +16,7 @@ export default {
        pages: "pages",
        mimeTypes: "Media types",
        tags: "Tags",
+        tagFilter: "Filter tags",
        help: {
            simpleSearch: "Simple search",
            advancedSearch: "Advanced search",
@@ -67,7 +71,11 @@ export default {
            tagOrOperator: "Use OR operator when specifying multiple tags.",
            hideDuplicates: "Hide duplicate results based on checksum",
            hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
-            updateMimeMap: "Update the Media Types tree in real time"
+            updateMimeMap: "Update the Media Types tree in real time",
+            useDatePicker: "Use a Date Picker component rather than a slider",
+            vidPreviewInterval: "Video preview frame duration in ms",
+            simpleLightbox: "Disable animations in image viewer",
+            showTagPickerFilter: "Display the tag filter bar"
        },
        queryMode: {
            simple: "Simple",
@@ -161,6 +169,9 @@ export default {
        },
    },
    fr: {
+        filePage: {
+            notFound: "Ficher introuvable"
+        },
        searchBar: {
            simple: "Recherche",
            advanced: "Recherche avancée",
@@ -174,6 +185,7 @@ export default {
        pages: "pages",
        mimeTypes: "Types de médias",
        tags: "Tags",
+        tagFilter: "Filtrer les tags",
        help: {
            simpleSearch: "Recherche simple",
            advancedSearch: "Recherche avancée",
@@ -229,7 +241,11 @@ export default {
            tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
            hideDuplicates: "Masquer les résultats en double",
            hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
-            updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel"
+            updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
+            useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
+            vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
+            simpleLightbox: "Désactiver les animations du visualiseur d'images",
+            showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
        },
        queryMode: {
            simple: "Simple",
@@ -324,6 +340,9 @@ export default {
        },
    },
    "zh-CN": {
+        filePage: {
+            notFound: "未找到"
+        },
        searchBar: {
            simple: "搜索",
            advanced: "高级搜索",
@@ -337,6 +356,7 @@ export default {
        pages: "页",
        mimeTypes: "文件类型",
        tags: "标签",
+        tagFilter: "筛选标签",
        help: {
            simpleSearch: "简易搜索",
            advancedSearch: "高级搜索",
@@ -391,7 +411,11 @@ export default {
            tagOrOperator: "使用或操作（OR）匹配多个标签。",
            hideDuplicates: "使用校验码隐藏重复结果",
            hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
-            updateMimeMap: "媒体类型树的实时更新"
+            updateMimeMap: "媒体类型树的实时更新",
+            useDatePicker: "使用日期选择器组件而不是滑块",
+            vidPreviewInterval: "视频预览帧的持续时间，以毫秒为单位",
+            simpleLightbox: "在图片查看器中，禁用动画",
+            showTagPickerFilter: "显示标签过滤栏"
        },
        queryMode: {
            simple: "简单",
--- a/sist2-vue/src/router/index.ts
+++ b/sist2-vue/src/router/index.ts
@@ -3,6 +3,7 @@ import VueRouter, {RouteConfig} from "vue-router"
 import StatsPage from "../views/StatsPage.vue"
 import Configuration from "../views/Configuration.vue"
 import SearchPage from "@/views/SearchPage.vue";
+import FilePage from "@/views/FilePage.vue";

 Vue.use(VueRouter)

@@ -21,6 +22,11 @@ const routes: Array<RouteConfig> = [
        path: "/config",
        name: "Configuration",
        component: Configuration
+    },
+    {
+        path: "/file",
+        name: "File",
+        component: FilePage
    }
 ]

--- a/sist2-vue/src/store/index.ts
+++ b/sist2-vue/src/store/index.ts
@@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
 import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
 import {deserializeMimes, serializeMimes} from "@/util";

+const CONF_VERSION = 2;
+
 Vue.use(Vuex)

 export default new Vuex.Store({
@@ -24,7 +26,6 @@ export default new Vuex.Store({
        sortMode: "score",

        fuzzy: false,
-        size: 60,

        optLang: "en",
        optLangIsDefault: true,
@@ -32,6 +33,7 @@ export default new Vuex.Store({
        optTheme: "light",
        optDisplay: "grid",

+        optSize: 60,
        optHighlight: true,
        optTagOrOperator: false,
        optFuzzy: true,
@@ -48,7 +50,11 @@ export default new Vuex.Store({
        optLightboxLoadOnlyCurrent: false,
        optLightboxSlideDuration: 15,
        optHideLegacy: false,
-        optUpdateMimeMap: true,
+        optUpdateMimeMap: false,
+        optUseDatePicker: false,
+        optVidPreviewInterval: 700,
+        optSimpleLightbox: true,
+        optShowTagPickerFilter: true,

        _onLoadSelectedIndices: [] as string[],
        _onLoadSelectedMimeTypes: [] as string[],
@@ -147,7 +153,7 @@ export default new Vuex.Store({
        setOptSuggestPath: (state, val) => state.optSuggestPath = val,
        setOptFragmentSize: (state, val) => state.optFragmentSize = val,
        setOptQueryMode: (state, val) => state.optQueryMode = val,
-        setOptResultSize: (state, val) => state.size = val,
+        setOptResultSize: (state, val) => state.optSize = val,
        setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,

        setOptTreemapType: (state, val) => state.optTreemapType = val,
@@ -157,6 +163,10 @@ export default new Vuex.Store({
        setOptTreemapColor: (state, val) => state.optTreemapColor = val,
        setOptHideLegacy: (state, val) => state.optHideLegacy = val,
        setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
+        setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
+        setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
+        setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
+        setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,

        setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
        setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -172,6 +182,12 @@ export default new Vuex.Store({
        busSearch: () => {
            // noop
        },
+        busTouchEnd: () => {
+            // noop
+        },
+        busTnTouchStart: (doc_id) => {
+            // noop
+        },
    },
    actions: {
        setSist2Info: (store, val) => {
@@ -229,6 +245,11 @@ export default new Vuex.Store({
            }
        },
        async updateArgs({state}, router: VueRouter) {
+
+            if (router.currentRoute.path !== "/") {
+                return;
+            }
+
            await router.push({
                query: {
                    q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@@ -257,6 +278,8 @@ export default new Vuex.Store({
                }
            });

+            conf["version"] = CONF_VERSION;
+
            localStorage.setItem("sist2_configuration", JSON.stringify(conf));
        },
        loadConfiguration({state}) {
@@ -264,6 +287,11 @@ export default new Vuex.Store({
            if (confString) {
                const conf = JSON.parse(confString);

+                if (!("version" in conf) || conf["version"] != CONF_VERSION) {
+                    localStorage.removeItem("sist2_configuration");
+                    window.location.reload();
+                }
+
                Object.keys(state).forEach((key) => {
                    if (key.startsWith("opt")) {
                        (state as any)[key] = conf[key];
@@ -325,7 +353,7 @@ export default new Vuex.Store({
        searchText: state => state.searchText,
        pathText: state => state.pathText,
        fuzzy: state => state.fuzzy,
-        size: state => state.size,
+        size: state => state.optSize,
        sortMode: state => state.sortMode,
        lastQueryResult: state => state.lastQueryResults,
        lastDoc: function (state): EsHit | null {
@@ -363,8 +391,12 @@ export default new Vuex.Store({
        optTreemapColor: state => state.optTreemapColor,
        optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
        optLightboxSlideDuration: state => state.optLightboxSlideDuration,
-        optResultSize: state => state.size,
+        optResultSize: state => state.optSize,
        optHideLegacy: state => state.optHideLegacy,
        optUpdateMimeMap: state => state.optUpdateMimeMap,
+        optUseDatePicker: state => state.optUseDatePicker,
+        optVidPreviewInterval: state => state.optVidPreviewInterval,
+        optSimpleLightbox: state => state.optSimpleLightbox,
+        optShowTagPickerFilter: state => state.optShowTagPickerFilter,
    }
 })
--- a/sist2-vue/src/util.ts
+++ b/sist2-vue/src/util.ts
@@ -1,8 +1,12 @@
 import {EsHit} from "@/Sist2Api";

 export function ext(hit: EsHit) {
-    return Object.prototype.hasOwnProperty.call(hit._source, "extension")
-    && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
+    return srcExt(hit._source)
+}
+
+export function srcExt(src) {
+    return Object.prototype.hasOwnProperty.call(src, "extension")
+        && src["extension"] !== "" ? "." + src["extension"] : "";
 }

 export function strUnescape(str: string): string {
--- a/sist2-vue/src/views/Configuration.vue
+++ b/sist2-vue/src/views/Configuration.vue
@@ -41,6 +41,20 @@
          <b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap">
            {{ $t("opt.updateMimeMap") }}
          </b-form-checkbox>
+
+          <b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
+            {{ $t("opt.useDatePicker") }}
+          </b-form-checkbox>
+
+          <b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
+              $t("opt.simpleLightbox")
+            }}
+          </b-form-checkbox>
+
+          <b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
+              $t("opt.showTagPickerFilter")
+            }}
+          </b-form-checkbox>
        </b-card>

        <br/>
@@ -81,6 +95,10 @@
          <label>{{ $t("opt.slideDuration") }}</label>
          <b-form-input :value="optLightboxSlideDuration" type="number" min="1"
                        @input="setOptLightboxSlideDuration"></b-form-input>
+
+          <label>{{ $t("opt.vidPreviewInterval") }}</label>
+          <b-form-input :value="optVidPreviewInterval" type="number" min="50"
+                        @input="setOptVidPreviewInterval"></b-form-input>
        </b-card>

        <h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
@@ -229,6 +247,10 @@ export default {
      "optHideDuplicates",
      "optHideLegacy",
      "optUpdateMimeMap",
+      "optUseDatePicker",
+      "optVidPreviewInterval",
+      "optSimpleLightbox",
+      "optShowTagPickerFilter",
    ]),
    clientWidth() {
      return window.innerWidth;
@@ -272,7 +294,11 @@ export default {
      "setOptLang",
      "setOptHideDuplicates",
      "setOptHideLegacy",
-      "setOptUpdateMimeMap"
+      "setOptUpdateMimeMap",
+      "setOptUseDatePicker",
+      "setOptVidPreviewInterval",
+      "setOptSimpleLightbox",
+      "setOptShowTagPickerFilter",
    ]),
    onResetClick() {
      localStorage.removeItem("sist2_configuration");
--- a/sist2-vue/src/views/FilePage.vue
+++ b/sist2-vue/src/views/FilePage.vue
@@ -0,0 +1,149 @@
+<template>
+  <div style="margin-left: auto; margin-right: auto;" class="container">
+    <Preloader v-if="loading"></Preloader>
+    <b-card v-else-if="!loading && found">
+      <b-card-title :title="doc._source.name + ext(doc)">
+        {{ doc._source.name + ext(doc) }}
+      </b-card-title>
+
+      <!-- Thumbnail-->
+      <div style="position: relative; margin-left: auto; margin-right: auto; text-align: center">
+        <FullThumbnail :doc="doc" :small-badge="false" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>
+      </div>
+
+      <!-- Audio player-->
+      <audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
+             :type="doc._source.mime"
+             :src="`f/${doc._id}`"></audio>
+
+      <InfoTable :doc="doc" v-if="doc"></InfoTable>
+
+      <div v-if="doc._source.content" class="content-div">{{ doc._source.content }}</div>
+    </b-card>
+    <div v-else>
+      <b-card>
+        <b-card-title>{{ $t("filePage.notFound") }}</b-card-title>
+      </b-card>
+    </div>
+  </div>
+</template>
+
+<script>
+import Preloader from "@/components/Preloader.vue";
+import InfoTable from "@/components/InfoTable.vue";
+import Sist2Api from "@/Sist2Api";
+import {ext} from "@/util";
+import Vue from "vue";
+import sist2 from "@/Sist2Api";
+import FullThumbnail from "@/components/FullThumbnail";
+
+export default Vue.extend({
+  name: "FilePage",
+  components: {
+    FullThumbnail,
+    Preloader,
+    InfoTable
+  },
+  data() {
+    return {
+      loading: true,
+      found: false,
+      doc: null
+    }
+  },
+  methods: {
+    ext: ext,
+    onThumbnailClick() {
+      window.open(`/f/${this.doc._id}`, "_blank");
+    },
+    findByCustomField(field, id) {
+      return {
+        query: {
+          bool: {
+            must: [
+              {
+                match: {
+                  [field]: id
+                }
+              }
+            ]
+          }
+        },
+        size: 1
+      }
+    },
+    findById(id) {
+      return {
+        query: {
+          bool: {
+            must: [
+              {
+                match: {
+                  "_id": id
+                }
+              }
+            ]
+          }
+        },
+        size: 1
+      }
+    },
+    findByName(name) {
+      return {
+        query: {
+          bool: {
+            must: [
+              {
+                match: {
+                  "name": name
+                }
+              }
+            ]
+          }
+        },
+        size: 1
+      }
+    }
+
+  },
+  mounted() {
+    if (this.$store.state.sist2Info === null) {
+      sist2.getSist2Info().then(data => {
+        this.$store.dispatch("setSist2Info", data);
+        this.$store.commit("setIndices", data.indices);
+      });
+    }
+
+    let query = null;
+    if (this.$route.query.byId) {
+      query = this.findById(this.$route.query.byId);
+    } else if (this.$route.query.byName) {
+      query = this.findByName(this.$route.query.byName);
+    } else if (this.$route.query.by && this.$route.query.q) {
+      query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
+    }
+
+    if (query) {
+      Sist2Api.esQuery(query).then(result => {
+        if (result.hits.hits.length === 0) {
+          this.found = false;
+        } else {
+          this.doc = result.hits.hits[0];
+          this.found = true;
+        }
+
+        this.loading = false;
+      });
+    } else {
+      this.loading = false;
+      this.found = false;
+    }
+  }
+});
+</script>
+
+<style scoped>
+.img-wrapper {
+  display: inline-block;
+}
+</style>
--- a/sist2-vue/src/views/SearchPage.vue
+++ b/sist2-vue/src/views/SearchPage.vue
@@ -19,11 +19,7 @@
      </b-row>
      <b-row>
        <b-col sm="6">
-          <b-row>
-            <b-col style="height: 70px;">
-              <DateSlider></DateSlider>
-            </b-col>
-          </b-row>
+          <DateSlider></DateSlider>
          <b-row>
            <b-col>
              <IndexPicker></IndexPicker>
@@ -36,7 +32,7 @@
              <MimePicker></MimePicker>
            </b-tab>
            <b-tab :title="$t('tags')">
-              <TagPicker></TagPicker>
+              <TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
            </b-tab>
          </b-tabs>
        </b-col>
@@ -104,6 +100,10 @@ export default Vue.extend({
    ...mapGetters(["indices", "optDisplay"]),
  },
  mounted() {
+    // Handle touch events
+    window.ontouchend = () => this.$store.commit("busTouchEnd");
+    window.ontouchcancel = this.$store.commit("busTouchEnd");
+
    this.search = _debounce(async (clear: boolean) => {
      if (clear) {
        await this.clearResults();
@@ -139,7 +139,9 @@ export default Vue.extend({
        this.setSist2Info(data);
        this.setIndices(data.indices);

-        Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
+        const doBlankSearch = !this.$store.state.optUpdateMimeMap;
+
+        Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
          this.$store.commit("setUiMimeMap", mimeMap);
          this.uiLoading = false;
          this.search(true);
@@ -206,7 +208,7 @@ export default Vue.extend({
      this.$store.commit("setUiReachedScrollEnd", false);
    },
    async handleSearch(resp: EsResult) {
-      if (resp.hits.hits.length == 0) {
+      if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
        this.$store.commit("setUiReachedScrollEnd", true);
      }

@@ -246,6 +248,8 @@ export default Vue.extend({
      this.$store.commit("setLastQueryResult", resp);

      this.docs.push(...resp.hits.hits);
+
+      resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
    },
    getDateRange(): Promise<{ min: number, max: number }> {
      return sist2.esQuery({
--- a/src/cli.c
+++ b/src/cli.c
@@ -5,7 +5,8 @@
 #define DEFAULT_OUTPUT "index.sist2/"
 #define DEFAULT_CONTENT_SIZE 32768
 #define DEFAULT_QUALITY 1
-#define DEFAULT_SIZE 300
+#define DEFAULT_THUMBNAIL_SIZE 500
+#define DEFAULT_THUMBNAIL_COUNT 1
 #define DEFAULT_REWRITE_URL ""

 #define DEFAULT_ES_URL "http://localhost:9200"
@@ -19,6 +20,8 @@

 #define DEFAULT_MAX_MEM_BUFFER 2000

+#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
+
 const char *TESS_DATAPATHS[] = {
        "/usr/share/tessdata/",
        "/usr/share/tesseract-ocr/tessdata/",
@@ -65,6 +68,10 @@ void index_args_destroy(index_args_t *args) {
    if (args->es_settings_path) {
        free(args->es_settings);
    }
+
+    if (args->index_path != NULL) {
+        free(args->index_path);
+    }
    free(args);
 }

@@ -74,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
 }

 void exec_args_destroy(exec_args_t *args) {
+
+    if (args->index_path != NULL) {
+        free(args->index_path);
+    }
+
    free(args);
 }

@@ -85,13 +97,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    char *abs_path = abspath(argv[1]);
    if (abs_path == NULL) {
-        fprintf(stderr, "File not found: %s\n", argv[1]);
-        return 1;
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
    } else {
        args->path = abs_path;
    }

-    if (args->incremental != NULL) {
+    if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
        args->incremental = abspath(args->incremental);
        if (abs_path == NULL) {
            sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
@@ -99,32 +110,42 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
    }

-    if (args->quality == 0) {
-        args->quality = DEFAULT_QUALITY;
-    } else if (args->quality < 1 || args->quality > 31) {
-        fprintf(stderr, "Invalid quality: %f\n", args->quality);
+    if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
+        args->tn_quality = DEFAULT_QUALITY;
+    } else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
+        fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
+                args->tn_quality);
        return 1;
    }

-    if (args->size == 0) {
-        args->size = DEFAULT_SIZE;
-    } else if (args->size > 0 && args->size < 32) {
-        printf("Invalid size: %d\n", args->content_size);
+    if (args->tn_size == OPTION_VALUE_UNSPECIFIED) {
+        args->tn_size = DEFAULT_THUMBNAIL_SIZE;
+    } else if (args->tn_size < 32) {
+        printf("Invalid value --thumbnail-size argument: %d. Must be greater than 32 pixels.\n", args->tn_size);
        return 1;
    }

-    if (args->content_size == 0) {
+    if (args->tn_count == OPTION_VALUE_UNSPECIFIED) {
+        args->tn_count = DEFAULT_THUMBNAIL_COUNT;
+    } else if (args->tn_count == OPTION_VALUE_DISABLE) {
+        args->tn_count = 0;
+    } else if (args->tn_count > 1000) {
+        printf("Invalid value --thumbnail-count argument: %d. Must be <= 1000.\n", args->tn_size);
+        return 1;
+    }
+
+    if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
        args->content_size = DEFAULT_CONTENT_SIZE;
    }

    if (args->threads == 0) {
        args->threads = 1;
    } else if (args->threads < 0) {
-        fprintf(stderr, "Invalid threads: %d\n", args->threads);
+        fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
        return 1;
    }

-    if (args->output == NULL) {
+    if (args->output == OPTION_VALUE_UNSPECIFIED) {
        args->output = malloc(strlen(DEFAULT_OUTPUT) + 1);
        strcpy(args->output, DEFAULT_OUTPUT);
    } else {
@@ -143,7 +164,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->depth += 1;
    }

-    if (args->name == NULL) {
+    if (args->name == OPTION_VALUE_UNSPECIFIED) {
        args->name = g_path_get_basename(args->output);
    } else {
        char *tmp = malloc(strlen(args->name) + 1);
@@ -151,11 +172,11 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->name = tmp;
    }

-    if (args->rewrite_url == NULL) {
+    if (args->rewrite_url == OPTION_VALUE_UNSPECIFIED) {
        args->rewrite_url = DEFAULT_REWRITE_URL;
    }

-    if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
+    if (args->archive == OPTION_VALUE_UNSPECIFIED || strcmp(args->archive, "recurse") == 0) {
        args->archive_mode = ARC_MODE_RECURSE;
    } else if (strcmp(args->archive, "list") == 0) {
        args->archive_mode = ARC_MODE_LIST;
@@ -168,17 +189,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        return 1;
    }

-    if (args->ocr_images && args->tesseract_lang == NULL) {
+    if (args->ocr_images && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
        fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images");
        return 1;
    }

-    if (args->ocr_ebooks && args->tesseract_lang == NULL) {
+    if (args->ocr_ebooks && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
        fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks");
        return 1;
    }

-    if (args->tesseract_lang != NULL) {
+    if (args->tesseract_lang != OPTION_VALUE_UNSPECIFIED) {

        if (!args->ocr_ebooks && !args->ocr_images) {
            fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images");
@@ -222,7 +243,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->tesseract_path = trained_data_path;
    }

-    if (args->exclude_regex != NULL) {
+    if (args->exclude_regex != OPTION_VALUE_UNSPECIFIED) {
        const char *error;
        int error_offset;

@@ -242,17 +263,21 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        ScanCtx.exclude = NULL;
    }

-    if (args->treemap_threshold_str == 0) {
+    if (args->treemap_threshold_str == OPTION_VALUE_UNSPECIFIED) {
        args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
    } else {
        args->treemap_threshold = atof(args->treemap_threshold_str);
    }

-    if (args->max_memory_buffer == 0) {
-        args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
+    if (args->max_memory_buffer_mib == OPTION_VALUE_UNSPECIFIED) {
+        args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
    }

-    if (args->list_path != NULL) {
+    if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
+        args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
+    }
+
+    if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
        if (strcmp(args->list_path, "-") == 0) {
            args->list_file = stdin;
            LOG_DEBUG("cli.c", "Using stdin as list file")
@@ -265,8 +290,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
    }

-    LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
-    LOG_DEBUGF("cli.c", "arg size=%d", args->size)
+    LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
+    LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
+    LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
    LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
@@ -283,7 +309,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
    LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
-    LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
+    LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
    LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)

    return 0;
@@ -335,11 +361,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        fprintf(stderr, "File not found: %s\n", argv[1]);
-        return 1;
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
    } else {
-        args->index_path = argv[1];
-        free(index_path);
+        args->index_path = index_path;
    }

    if (args->es_url == NULL) {
@@ -374,10 +398,19 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
-    LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
-    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
+    LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
+
+    if (args->script) {
+        char log_buf[5000];
+
+        strncpy(log_buf, args->script, sizeof(log_buf));
+        *(log_buf + sizeof(log_buf) - 1) = '\0';
+        LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+    }
+
    LOG_DEBUGF("cli.c", "arg print=%d", args->print)
    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
@@ -474,13 +507,13 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s\n", args->indices[i]);
-            return 1;
+            LOG_FATALF("cli.c", "Index not found: %s", args->indices[i])
        }
    }

    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
    LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
    LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
@@ -515,11 +548,9 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        fprintf(stderr, "File not found: %s\n", argv[1]);
-        return 1;
+        LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
    } else {
-        args->index_path = argv[1];
-        free(index_path);
+        args->index_path = index_path;
    }

    if (args->es_url == NULL) {
@@ -539,6 +570,11 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
    }

    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
-    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
+
+    char log_buf[5000];
+    strncpy(log_buf, args->script, sizeof(log_buf));
+    *(log_buf + sizeof(log_buf) - 1) = '\0';
+    LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+
    return 0;
 }
--- a/src/cli.h
+++ b/src/cli.h
@@ -5,11 +5,15 @@

 #include "libscan/arc/arc.h"

+#define OPTION_VALUE_DISABLE (-1)
+#define OPTION_VALUE_UNSPECIFIED (0)
+
 typedef struct scan_args {
-    float quality;
-    int size;
+    float tn_quality;
+    int tn_size;
    int content_size;
    int threads;
+    int scan_mem_limit_mib;
    char *incremental;
    char *output;
    char *rewrite_url;
@@ -27,8 +31,10 @@ typedef struct scan_args {
    int fast;
    const char* treemap_threshold_str;
    double treemap_threshold;
-    int max_memory_buffer;
+    int max_memory_buffer_mib;
    int read_subtitles;
+    /** Number of thumbnails to generate */
+    int tn_count;
    int fast_epub;
    int calculate_checksums;
    char *list_path;
@@ -44,7 +50,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
 typedef struct index_args {
    char *es_url;
    char *es_index;
-    const char *index_path;
+    int es_insecure_ssl;
+    char *index_path;
    const char *script_path;
    char *script;
    const char *es_settings_path;
@@ -56,11 +63,13 @@ typedef struct index_args {
    int async_script;
    int force_reset;
    int threads;
+    int incremental;
 } index_args_t;

 typedef struct web_args {
    char *es_url;
    char *es_index;
+    int es_insecure_ssl;
    char *listen_address;
    char *credentials;
    char *tag_credentials;
@@ -78,7 +87,8 @@ typedef struct web_args {
 typedef struct exec_args {
    char *es_url;
    char *es_index;
-    const char *index_path;
+    int es_insecure_ssl;
+    char *index_path;
    const char *script_path;
    int async_script;
    char *script;
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -35,12 +35,14 @@ typedef struct {
    int threads;
    int depth;
    int calculate_checksums;
+    size_t mem_limit;

    size_t stat_tn_size;
    size_t stat_index_size;

    GHashTable *original_table;
    GHashTable *copy_table;
+    GHashTable *new_table;
    pthread_mutex_t copy_table_mu;

    pcre *exclude;
@@ -77,6 +79,7 @@ typedef struct {

 typedef struct {
    char *es_url;
+    int es_insecure_ssl;
    es_version_t *es_version;
    char *es_index;
    int batch_size;
@@ -85,12 +88,17 @@ typedef struct {
    GHashTable *tags;
    store_t *meta_store;
    GHashTable *meta;
+    /**
+     * Set to false when using --print
+     */
+    int needs_es_connection;
 } IndexCtx_t;

 typedef struct {
    char *es_url;
    es_version_t *es_version;
    char *es_index;
+    int es_insecure_ssl;
    int index_count;
    char *auth_user;
    char *auth_pass;
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -15,28 +15,45 @@ typedef struct es_indexer {
 } es_indexer_t;


-static __thread es_indexer_t *Indexer;
+static __thread es_indexer_t *Indexer = NULL;

-void delete_queue(int max);
+void free_queue(int max);

 void elastic_flush();

-void elastic_cleanup() {
-    elastic_flush();
-    if (Indexer != NULL) {
-        free(Indexer->es_index);
-        free(Indexer->es_url);
-        free(Indexer);
+void print_error(response_t *r);
+
+void destroy_indexer(es_indexer_t *indexer) {
+
+    if (indexer == NULL) {
+        return;
    }
+
+    LOG_DEBUG("elastic.c", "Destroying indexer")
+
+    if (indexer->es_url != NULL) {
+        free(indexer->es_url);
+        free(indexer->es_index);
+    }
+
+    free(indexer);
 }

-void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
+void elastic_cleanup() {
+    if (IndexCtx.needs_es_connection) {
+        elastic_flush();
+    }
+
+    destroy_indexer(Indexer);
+}
+
+void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {

    cJSON *line = cJSON_CreateObject();

    cJSON_AddStringToObject(line, "_id", id_str);
    cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
-    cJSON_AddStringToObject(line, "_type", "_doc");
+//    cJSON_AddStringToObject(line, "_type", "_doc");
    cJSON_AddItemReferenceToObject(line, "_source", document);

    char *json = cJSON_PrintUnformatted(line);
@@ -52,13 +69,24 @@ void index_json_func(void *arg) {
    elastic_index_line(line);
 }

-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
+void delete_document(const char* document_id_str, void* UNUSED(_data)) {
+    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
+    bulk_line->type = ES_BULK_LINE_DELETE;
+    bulk_line->next = NULL;
+
+    strcpy(bulk_line->doc_id, document_id_str);
+    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
+}
+
+
+void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
+    bulk_line->type = ES_BULK_LINE_INDEX;
    memcpy(bulk_line->line, json, json_len);
-    memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
+    strcpy(bulk_line->doc_id, doc_id);
    *(bulk_line->line + json_len) = '\n';
    *(bulk_line->line + json_len + 1) = '\0';
    bulk_line->next = NULL;
@@ -67,7 +95,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
+void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {

    if (Indexer == NULL) {
        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -82,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[MD
    cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
    cJSON_AddStringToObject(term_obj, "index", index_id);

-    char *str = cJSON_Print(body);
+    char *str = cJSON_PrintUnformatted(body);

-    char bulk_url[4096];
+    char url[4096];
    if (async) {
-        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
+        snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
                 Indexer->es_index);
    } else {
-        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
+        snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
    }
-    response_t *r = web_post(bulk_url, str);
+    response_t *r = web_post(url, str, IndexCtx.es_insecure_ssl);
    if (!async) {
        LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
    }
@@ -111,13 +139,18 @@ void execute_update_script(const char *script, int async, const char index_id[MD

    if (async) {
        cJSON *task = cJSON_GetObjectItem(resp, "task");
+
+        if (task == NULL) {
+            LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
+        }
+
        LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
    }

    cJSON_Delete(resp);
 }

-void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
+void *create_bulk_buffer(int max, int *count, size_t *buf_len, int legacy) {
    es_bulk_line_t *line = Indexer->line_head;
    *count = 0;

@@ -125,30 +158,56 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
    size_t buf_cur = 0;
    char *buf = malloc(8192);
    size_t buf_capacity = 8192;
+#define GROW_BUF(delta)                       \
+    while (buf_size + (delta) > buf_capacity) { \
+      buf_capacity *= 2;                        \
+      buf = realloc(buf, buf_capacity);         \
+    }                                           \
+    buf_size += (delta);                        \

+    // see: https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
+    // ES_BULK_LINE_INDEX: two lines, 1st action, 2nd content
+    // ES_BULK_LINE_DELETE: one line
    while (line != NULL && *count < max) {
        char action_str[256];
-        snprintf(
-                action_str, sizeof(action_str),
-                "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
-                line->path_md5_str, Indexer->es_index
-        );
+        if (line->type == ES_BULK_LINE_INDEX) {

-        size_t action_str_len = strlen(action_str);
-        size_t line_len = strlen(line->line);
+            if (legacy) {
+                snprintf(
+                        action_str, sizeof(action_str),
+                        "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
+                        line->doc_id, Indexer->es_index
+                );
+            } else {
+                snprintf(
+                        action_str, sizeof(action_str),
+                        "{\"index\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
+                        line->doc_id, Indexer->es_index
+                );
+            }

-        while (buf_size + line_len + action_str_len > buf_capacity) {
-            buf_capacity *= 2;
-            buf = realloc(buf, buf_capacity);
+            size_t action_str_len = strlen(action_str);
+            size_t line_len = strlen(line->line);
+
+            GROW_BUF(action_str_len + line_len);
+
+            memcpy(buf + buf_cur, action_str, action_str_len);
+            buf_cur += action_str_len;
+            memcpy(buf + buf_cur, line->line, line_len);
+            buf_cur += line_len;
+
+        } else if (line->type == ES_BULK_LINE_DELETE) {
+            snprintf(
+                    action_str, sizeof(action_str),
+                    "{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
+                    line->doc_id, Indexer->es_index
+            );
+
+            size_t action_str_len = strlen(action_str);
+            GROW_BUF(action_str_len);
+            memcpy(buf + buf_cur, action_str, action_str_len);
+            buf_cur += action_str_len;
        }
-
-        buf_size += line_len + action_str_len;
-
-        memcpy(buf + buf_cur, action_str, action_str_len);
-        buf_cur += action_str_len;
-        memcpy(buf + buf_cur, line->line, line_len);
-        buf_cur += line_len;
-
        line = line->next;
        (*count)++;
    }
@@ -169,7 +228,13 @@ void print_errors(response_t *r) {
    *(tmp + r->size) = '\0';

    cJSON *ret_json = cJSON_Parse(tmp);
-    if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
+    cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
+
+    if (errors == NULL) {
+        char *str = cJSON_Print(ret_json);
+        LOG_ERRORF("elastic.c", "%s\n", str);
+        cJSON_free(str);
+    } else if (errors->valueint != 0) {
        cJSON *err;
        cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
            if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@@ -207,11 +272,11 @@ void _elastic_flush(int max) {

    size_t buf_len;
    int count;
-    void *buf = create_bulk_buffer(max, &count, &buf_len);
+    void *buf = create_bulk_buffer(max, &count, &buf_len, IS_LEGACY_VERSION(IndexCtx.es_version));

    char bulk_url[4096];
    snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
-    response_t *r = web_post(bulk_url, buf);
+    response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);

    if (r->status_code == 0) {
        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
@@ -220,10 +285,10 @@ void _elastic_flush(int max) {
    if (r->status_code == 413) {

        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
            free_response(r);
            free(buf);
-            delete_queue(1);
+            free_queue(1);
            if (Indexer->queued != 0) {
                elastic_flush();
            }
@@ -248,13 +313,13 @@ void _elastic_flush(int max) {

    } else if (r->status_code != 200) {
        print_errors(r);
-        delete_queue(Indexer->queued);
+        free_queue(Indexer->queued);

    } else {

        print_errors(r);
        LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
-        delete_queue(max);
+        free_queue(max);

        if (Indexer->queued != 0) {
            elastic_flush();
@@ -265,7 +330,7 @@ void _elastic_flush(int max) {
    free(buf);
 }

-void delete_queue(int max) {
+void free_queue(int max) {
    for (int i = 0; i < max; i++) {
        es_bulk_line_t *tmp = Indexer->line_head;
        Indexer->line_head = tmp->next;
@@ -309,16 +374,22 @@ void elastic_index_line(es_bulk_line_t *line) {

 es_indexer_t *create_indexer(const char *url, const char *index) {

-    char *es_url = malloc(strlen(url) + 1);
-    strcpy(es_url, url);
-
-    char *es_index = malloc(strlen(index) + 1);
-    strcpy(es_index, index);
-
    es_indexer_t *indexer = malloc(sizeof(es_indexer_t));

-    indexer->es_url = es_url;
-    indexer->es_index = es_index;
+    if (IndexCtx.needs_es_connection) {
+        char *es_url = malloc(strlen(url) + 1);
+        strcpy(es_url, url);
+
+        char *es_index = malloc(strlen(index) + 1);
+        strcpy(es_index, index);
+
+        indexer->es_url = es_url;
+        indexer->es_index = es_index;
+    } else {
+        indexer->es_url = NULL;
+        indexer->es_index = NULL;
+    }
+
    indexer->queued = 0;
    indexer->line_head = NULL;
    indexer->line_tail = NULL;
@@ -331,7 +402,7 @@ void finish_indexer(char *script, int async_script, char *index_id) {
    char url[4096];

    snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
-    response_t *r = web_post(url, "");
+    response_t *r = web_post(url, "", IndexCtx.es_insecure_ssl);
    LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
    free_response(r);

@@ -340,36 +411,44 @@ void finish_indexer(char *script, int async_script, char *index_id) {
        free(script);

        snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_post(url, "");
+        r = web_post(url, "", IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
        free_response(r);
    }

    snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
-    r = web_post(url, "");
+    r = web_post(url, "", IndexCtx.es_insecure_ssl);
    LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
    free_response(r);

    snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
-    r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
+    r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}", IndexCtx.es_insecure_ssl);
    LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
    free_response(r);
 }

-es_version_t *elastic_get_version(const char *es_url) {
-    response_t *r = web_get(es_url, 30);
+es_version_t *elastic_get_version(const char *es_url, int insecure) {
+    response_t *r = web_get(es_url, 30, insecure);

    char *tmp = malloc(r->size + 1);
    memcpy(tmp, r->body, r->size);
    *(tmp + r->size) = '\0';
    cJSON *response = cJSON_Parse(tmp);
    free(tmp);
-    free_response(r);

    if (response == NULL) {
        return NULL;
    }

+    if (cJSON_GetObjectItem(response, "error") != NULL) {
+        LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
+        print_error(r);
+        free_response(r);
+        return NULL;
+    }
+
+    free_response(r);
+
    if (cJSON_GetObjectItem(response, "version") == NULL ||
        cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
        cJSON_Delete(response);
@@ -394,7 +473,7 @@ es_version_t *elastic_get_version(const char *es_url) {

 void elastic_init(int force_reset, const char *user_mappings, const char *user_settings) {

-    es_version_t *es_version = elastic_get_version(IndexCtx.es_url);
+    es_version_t *es_version = elastic_get_version(IndexCtx.es_url, IndexCtx.es_insecure_ssl);
    IndexCtx.es_version = es_version;

    if (es_version == NULL) {
@@ -403,33 +482,33 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s

    LOG_INFOF("elastic.c",
              "Elasticsearch version is %s (supported=%d, legacy=%d)",
-              format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), USE_LEGACY_ES_SETTINGS(es_version));
+              format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));

    if (!IS_SUPPORTED_ES_VERSION(es_version)) {
-        LOG_FATAL("elastic.c", "sist2 only supports Elasticsearch v6.8 or newer")
+        LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
    }

    char *settings = NULL;
-    if (USE_LEGACY_ES_SETTINGS(es_version)) {
-        settings = settings_json;
-    } else {
+    if (IS_LEGACY_VERSION(es_version)) {
        settings = settings_legacy_json;
+    } else {
+        settings = settings_json;
    }

    // Check if index exists
    char url[4096];
    snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
-    response_t *r = web_get(url, 30);
+    response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
    int index_exists = r->status_code == 200;
    free_response(r);

    if (!index_exists || force_reset) {
-        r = web_delete(url);
+        r = web_delete(url, IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
        free_response(r);

        snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_put(url, "");
+        r = web_put(url, "", IndexCtx.es_insecure_ssl);

        if (r->status_code != 200) {
            print_error(r);
@@ -440,17 +519,17 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        free_response(r);

        snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_post(url, "");
+        r = web_post(url, "", IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
        free_response(r);

        snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
-        r = web_put(url, pipeline_json);
+        r = web_put(url, pipeline_json, IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
        free_response(r);

        snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_put(url, user_settings ? user_settings : settings);
+        r = web_put(url, user_settings ? user_settings : settings, IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
@@ -458,8 +537,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        }
        free_response(r);

-        snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_put(url, user_mappings ? user_mappings : mappings_json);
+        if (IS_LEGACY_VERSION(es_version)) {
+            snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
+        } else {
+            snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
+        }
+
+        r = web_put(url, user_mappings ? user_mappings : mappings_json, IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
@@ -468,7 +552,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        free_response(r);

        snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
-        r = web_post(url, "");
+        r = web_post(url, "", IndexCtx.es_insecure_ssl);
        LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
        free_response(r);
    }
@@ -478,7 +562,7 @@ cJSON *elastic_get_document(const char *id_str) {
    char url[4096];
    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);

-    response_t *r = web_get(url, 3);
+    response_t *r = web_get(url, 3, WebCtx.es_insecure_ssl);
    cJSON *json = NULL;
    if (r->status_code == 200) {
        char *tmp = malloc(r->size + 1);
@@ -496,7 +580,7 @@ char *elastic_get_status() {
    snprintf(url, sizeof(url),
             "%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);

-    response_t *r = web_get(url, 30);
+    response_t *r = web_get(url, 30, IndexCtx.es_insecure_ssl);
    cJSON *json = NULL;
    char *status = malloc(128 * sizeof(char));
    status[0] = '\0';
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -3,9 +3,13 @@

 #include "src/sist.h"

+#define ES_BULK_LINE_INDEX 0
+#define ES_BULK_LINE_DELETE 1
+
 typedef struct es_bulk_line {
    struct es_bulk_line *next;
-    char path_md5_str[MD5_STR_LENGTH];
+    char doc_id[SIST_DOC_ID_LEN];
+    int type;
    char line[0];
 } es_bulk_line_t;

@@ -16,8 +20,10 @@ typedef struct {
 } es_version_t;

 #define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
-#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
-#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
+#define VERSION_LT(version, maj, min) (!VERSION_GE(version, maj, min))
+
+#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8) && VERSION_LT((es_version), 9, 0))
+#define IS_LEGACY_VERSION(es_version) ((es_version) != NULL && VERSION_LT((es_version), 7, 14))

 __always_inline
 static const char *format_es_version(es_version_t *version) {
@@ -36,9 +42,11 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);

-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
+
+void delete_document(const char *document_id_str, void* data);

 es_indexer_t *create_indexer(const char *url, const char *index);

@@ -51,8 +59,8 @@ cJSON *elastic_get_document(const char *id_str);

 char *elastic_get_status();

-es_version_t *elastic_get_version(const char *es_url);
+es_version_t *elastic_get_version(const char *es_url, int insecure);

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
+void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);

 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@@ -22,7 +22,7 @@ void free_response(response_t *resp) {
    free(resp);
 }

-void web_post_async_poll(subreq_ctx_t* req) {
+void web_post_async_poll(subreq_ctx_t *req) {
    fd_set fdread;
    fd_set fdwrite;
    fd_set fdexcep;
@@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {

    CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);

-    if(mc != CURLM_OK) {
+    if (mc != CURLM_OK) {
        req->done = TRUE;
        return;
    }
@@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
    struct timeval timeout = {1, 0};
    int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);

-    switch(rc) {
+    switch (rc) {
        case -1:
            req->done = TRUE;
            break;
@@ -64,6 +64,10 @@ void web_post_async_poll(subreq_ctx_t* req) {
        req->response->size = req->response_buf.cur;
        curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);

+        if (req->response->status_code == 0) {
+            LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
+        }
+
        curl_multi_cleanup(req->multi);
        curl_easy_cleanup(req->handle);
        curl_slist_free_all(req->headers);
@@ -71,7 +75,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
    }
 }

-subreq_ctx_t *web_post_async(const char *url, char *data) {
+subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
    subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
    req->response = calloc(1, sizeof(response_t));
    req->data = data;
@@ -84,6 +88,11 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    if (insecure) {
+        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+    }
+
+    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);

    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -100,7 +109,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data) {
    return req;
 }

-response_t *web_get(const char *url, int timeout) {
+response_t *web_get(const char *url, int timeout, int insecure) {
    response_t *resp = malloc(sizeof(response_t));

    CURL *curl;
@@ -112,14 +121,24 @@ response_t *web_get(const char *url, int timeout) {
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
+    if (insecure) {
+        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+    }

    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

+    char err_buffer[CURL_ERROR_SIZE + 1] = {};
+    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
+
    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

+    if (resp->status_code == 0) {
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+    }
+
    curl_easy_cleanup(curl);
    curl_slist_free_all(headers);

@@ -128,7 +147,7 @@ response_t *web_get(const char *url, int timeout) {
    return resp;
 }

-response_t *web_post(const char *url, const char *data) {
+response_t *web_post(const char *url, const char *data, int insecure) {

    response_t *resp = malloc(sizeof(response_t));

@@ -141,6 +160,12 @@ response_t *web_post(const char *url, const char *data) {
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    if (insecure) {
+        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+    }
+
+    char err_buffer[CURL_ERROR_SIZE + 1] = {};
+    curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);

    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -151,17 +176,21 @@ response_t *web_post(const char *url, const char *data) {
    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

-    curl_easy_cleanup(curl);
-    curl_slist_free_all(headers);
-
    resp->body = buffer.buf;
    resp->size = buffer.cur;

+    if (resp->status_code == 0) {
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+    }
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
    return resp;
 }


-response_t *web_put(const char *url, const char *data) {
+response_t *web_put(const char *url, const char *data, int insecure) {

    response_t *resp = malloc(sizeof(response_t));

@@ -175,7 +204,10 @@ response_t *web_put(const char *url, const char *data) {
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
    curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
-    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
+    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
+    if (insecure) {
+        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+    }

    struct curl_slist *headers = NULL;
    headers = curl_slist_append(headers, "Content-Type: application/json");
@@ -194,7 +226,7 @@ response_t *web_put(const char *url, const char *data) {
    return resp;
 }

-response_t *web_delete(const char *url) {
+response_t *web_delete(const char *url, int insecure) {

    response_t *resp = malloc(sizeof(response_t));

@@ -207,6 +239,9 @@ response_t *web_delete(const char *url) {
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    if (insecure) {
+        curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
+    }

    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
    struct curl_slist *headers = NULL;
--- a/src/index/web.h
+++ b/src/index/web.h
@@ -25,14 +25,15 @@ typedef struct {
    response_t *response;
    int running_handles;
    int done;
+    char curl_err_buffer[CURL_ERROR_SIZE + 1];
 } subreq_ctx_t;

-response_t *web_get(const char *url, int timeout);
-response_t *web_post(const char * url, const char * data);
+response_t *web_get(const char *url, int timeout, int insecure);
+response_t *web_post(const char * url, const char * data, int insecure);
 void web_post_async_poll(subreq_ctx_t* req);
-subreq_ctx_t *web_post_async(const char *url, char *data);
-response_t *web_put(const char *url, const char *data);
-response_t *web_delete(const char *url);
+subreq_ctx_t *web_post_async(const char *url, char *data, int insecure);
+response_t *web_put(const char *url, const char *data, int insecure);
+response_t *web_delete(const char *url, int insecure);

 void free_response(response_t *resp);

--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -124,15 +124,14 @@ char *build_json_string(document_t *doc) {
        cJSON_AddStringToObject(json, "path", "");
    }

-    char md5_str[MD5_STR_LENGTH];
-    buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
-    cJSON_AddStringToObject(json, "_id", md5_str);
+    cJSON_AddStringToObject(json, "_id", doc->doc_id);

    // Metadata
    meta_line_t *meta = doc->meta_head;
    while (meta != NULL) {

        switch (meta->key) {
+            case MetaThumbnail:
            case MetaPages:
            case MetaWidth:
            case MetaHeight:
@@ -163,7 +162,6 @@ char *build_json_string(document_t *doc) {
            case MetaExifModel:
            case MetaAuthor:
            case MetaModifiedBy:
-            case MetaThumbnail:
            case MetaExifGpsLongitudeDMS:
            case MetaExifGpsLongitudeDec:
            case MetaExifGpsLongitudeRef:
@@ -398,7 +396,7 @@ void read_index_bin_handle_line(const char *line, const char *index_id, index_fu
    }
 }

-void read_index_ndjson(const char *path, const char *index_id, index_func func) {
+void read_lines(const char *path, const line_processor_t processor) {
    dyn_buffer_t buf = dyn_buffer_create();

    // Initialize zstd things
@@ -427,7 +425,7 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)

                if (c == '\n') {
                    dyn_buffer_write_char(&buf, '\0');
-                    read_index_bin_handle_line(buf.buf, index_id, func);
+                    processor.func(buf.buf, processor.data);
                    buf.cur = 0;
                } else {
                    dyn_buffer_write_char(&buf, c);
@@ -454,20 +452,29 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)
    fclose(file);
 }

-void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
+void read_index_ndjson(const char *line, void *_data) {
+    void **data = _data;
+    const char *index_id = data[0];
+    index_func func = data[1];
+    read_index_bin_handle_line(line, index_id, func);
+}

+void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
    if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
-        read_index_ndjson(path, index_id, func);
+        read_lines(path, (line_processor_t) {
+                .data = (void *[2]) {(void *) index_id, func},
+                .func = read_index_ndjson,
+        });
    }
 }

 static __thread GHashTable *IncrementalReadTable = NULL;

-void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
+void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
    const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;

-    incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
+    incremental_put(IncrementalReadTable, path_md5_str, mtime);
 }

 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
@@ -476,16 +483,15 @@ void incremental_read(GHashTable *table, const char *filepath, index_descriptor_
 }

 static __thread GHashTable *IncrementalCopyTable = NULL;
+static __thread GHashTable *IncrementalNewTable = NULL;
 static __thread store_t *IncrementalCopySourceStore = NULL;
 static __thread store_t *IncrementalCopyDestinationStore = NULL;

-void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
+void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {

-    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
-    unsigned char path_md5[MD5_DIGEST_LENGTH];
-    hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
+    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;

-    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
+    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
        // Copy index line
        cJSON_DeleteItemFromObject(document, "index");
        char *json_str = cJSON_PrintUnformatted(document);
@@ -499,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S

        // Copy tn store contents
        size_t buf_len;
-        char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
+        char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
        if (buf_len != 0) {
-            store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
+            store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
            free(buf);
        }
    }
@@ -524,3 +530,33 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,

    read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
 }
+
+void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
+
+    char doc_id_n[SIST_DOC_ID_LEN + 1];
+    doc_id_n[SIST_DOC_ID_LEN] = '\0';
+    doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
+    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
+
+    // do not delete archive virtual entries
+    if (cJSON_GetObjectItem(document, "parent") == NULL 
+        && !incremental_get(IncrementalCopyTable, doc_id)
+        && !incremental_get(IncrementalNewTable, doc_id)
+        ) {
+        memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
+        zstd_write_string(doc_id, sizeof(doc_id_n));
+    }
+}
+
+void incremental_delete(const char *del_filepath, const char *index_filepath,
+                        GHashTable *copy_table, GHashTable *new_table) {
+
+    if (WriterCtx.out_file == NULL) {
+        initialize_writer_ctx(del_filepath);
+    }
+
+    IncrementalCopyTable = copy_table;
+    IncrementalNewTable = new_table;
+
+    read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
+}
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -7,14 +7,24 @@
 #include <sys/syscall.h>
 #include <glib.h>

-typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
+typedef struct line_processor {
+  void* data;
+  void (*func)(const char*, void*);
+} line_processor_t;
+
+typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);

 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);

+void incremental_delete(const char *del_filepath, const char* index_filepath, 
+                        GHashTable *copy_table, GHashTable *new_table);
+
 void write_document(document_t *doc);

-void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
+void read_lines(const char *path, const line_processor_t processor);
+
+void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);

@@ -29,4 +39,18 @@ void write_index_descriptor(char *path, index_descriptor_t *desc);

 index_descriptor_t read_index_descriptor(char *path);

-#endif
+// caller ensures char file_path[PATH_MAX]
+#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
+    snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path);              \
+    if (access(file_path, R_OK) == 0) {                                                 \
+        action_ok;                                                                      \
+    } else {                                                                            \
+        action_main_fail;                                                               \
+    }                                                                                   \
+    snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path);          \
+    if ((cond_original) && access(file_path, R_OK) == 0) {                              \
+        action_ok;                                                                      \
+    }                                                                                   \
+
+
+#endif
--- a/src/io/store.c
+++ b/src/io/store.c
@@ -52,13 +52,7 @@ void store_flush(store_t *store) {
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

    if (LogCtx.very_verbose) {
-        if (key_len == MD5_DIGEST_LENGTH) {
-            char path_md5_str[MD5_STR_LENGTH];
-            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
-        } else {
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
-        }
+        LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
    }

 #if (SIST_FAKE_STORE != 1)
--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,

    job->vfile.info = *info;

-    memset(job->parent, 0, MD5_DIGEST_LENGTH);
+    job->parent[0] = '\0';

    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
--- a/src/log.c
+++ b/src/log.c
@@ -48,6 +48,12 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
    size_t maxsize = sizeof(log_str) - log_len;
    log_len += vsnprintf(log_str + log_len, maxsize, format, ap);

+    if (log_len >= maxsize) {
+        fprintf(stderr, "([%s] FIXME: Log string is too long to display: %dB)\n",
+                log_levels[level], log_len);
+        return;
+    }
+
    if (is_tty) {
        log_len += sprintf(log_str + log_len, "\033[0m\n");
    } else {
--- a/src/magic_generated.c
+++ b/src/magic_generated.c
--- a/src/main.c
+++ b/src/main.c
@@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;

 void sig_handler(int signum) {

-    LogCtx.verbose = 1;
-    LogCtx.very_verbose = 1;
+    LogCtx.verbose = TRUE;
+    LogCtx.very_verbose = TRUE;

    LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
    LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@@ -103,7 +103,7 @@ void sig_handler(int signum) {
    exit(-1);
 }

-void init_dir(const char *dirpath) {
+void init_dir(const char *dirpath, scan_args_t *args) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

@@ -111,9 +111,18 @@ void init_dir(const char *dirpath) {
    strcpy(ScanCtx.index.desc.version, Version);
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);

-    unsigned char index_md5[MD5_DIGEST_LENGTH];
-    MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
-    buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+    if (args->incremental != NULL) {
+        // copy old index id
+        char descriptor_path[PATH_MAX];
+        snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
+        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
+        memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
+    } else {
+        // generate new index id based on timestamp
+        unsigned char index_md5[MD5_DIGEST_LENGTH];
+        MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
+        buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+    }

    write_index_descriptor(path, &ScanCtx.index.desc);
 }
@@ -189,37 +198,41 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.comic_ctx.log = _log;
    ScanCtx.comic_ctx.logf = _logf;
    ScanCtx.comic_ctx.store = _store;
-    ScanCtx.comic_ctx.tn_size = args->size;
-    ScanCtx.comic_ctx.tn_qscale = args->quality;
+    ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
+    ScanCtx.comic_ctx.tn_size = args->tn_size;
+    ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");

    // Ebook
    pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
    ScanCtx.ebook_ctx.content_size = args->content_size;
-    ScanCtx.ebook_ctx.tn_size = args->size;
+    ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
+    ScanCtx.ebook_ctx.tn_size = args->tn_size;
    ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
    ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
    ScanCtx.ebook_ctx.log = _log;
    ScanCtx.ebook_ctx.logf = _logf;
    ScanCtx.ebook_ctx.store = _store;
    ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
-    ScanCtx.ebook_ctx.tn_qscale = args->quality;
+    ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;

    // Font
-    ScanCtx.font_ctx.enable_tn = args->size > 0;
+    ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.font_ctx.log = _log;
    ScanCtx.font_ctx.logf = _logf;
    ScanCtx.font_ctx.store = _store;

    // Media
-    ScanCtx.media_ctx.tn_qscale = args->quality;
-    ScanCtx.media_ctx.tn_size = args->size;
+    ScanCtx.media_ctx.tn_qscale = args->tn_quality;
+    ScanCtx.media_ctx.tn_size = args->tn_size;
+    ScanCtx.media_ctx.tn_count = args->tn_count;
    ScanCtx.media_ctx.log = _log;
    ScanCtx.media_ctx.logf = _logf;
    ScanCtx.media_ctx.store = _store;
-    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
+    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
    ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
+    ScanCtx.media_ctx.read_subtitles = args->tn_count;

    if (args->ocr_images) {
        ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang;
@@ -228,6 +241,7 @@ void initialize_scan_context(scan_args_t *args) {
    init_media();

    // OOXML
+    ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.ooxml_ctx.content_size = args->content_size;
    ScanCtx.ooxml_ctx.log = _log;
    ScanCtx.ooxml_ctx.logf = _logf;
@@ -244,7 +258,8 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.text_ctx.logf = _logf;

    // MSDOC
-    ScanCtx.msdoc_ctx.tn_size = args->size;
+    ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
+    ScanCtx.msdoc_ctx.tn_size = args->tn_size;
    ScanCtx.msdoc_ctx.content_size = args->content_size;
    ScanCtx.msdoc_ctx.log = _log;
    ScanCtx.msdoc_ctx.logf = _logf;
@@ -253,6 +268,7 @@ void initialize_scan_context(scan_args_t *args) {

    ScanCtx.threads = args->threads;
    ScanCtx.depth = args->depth;
+    ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;

    strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
    strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@@ -262,8 +278,9 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.fast = args->fast;

    // Raw
-    ScanCtx.raw_ctx.tn_qscale = args->quality;
-    ScanCtx.raw_ctx.tn_size = args->size;
+    ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
+    ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
+    ScanCtx.raw_ctx.tn_size = args->tn_size;
    ScanCtx.raw_ctx.log = _log;
    ScanCtx.raw_ctx.logf = _logf;
    ScanCtx.raw_ctx.store = _store;
@@ -282,37 +299,91 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
 }

-
+/**
+ * Loads an existing index as the baseline for incremental scanning.
+ *   1. load old index files (original+main) => original_table
+ *   2. allocate empty table                 => copy_table
+ *   3. allocate empty table                 => new_table
+ * the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
+ * and consumed in main.c:save_incremental_index
+ *
+ * Note: the existing index may or may not be of incremental index form.
+ */
 void load_incremental_index(const scan_args_t *args) {
+    char file_path[PATH_MAX];
+
    ScanCtx.original_table = incremental_get_table();
    ScanCtx.copy_table = incremental_get_table();
-
-    DIR *dir = opendir(args->incremental);
-    if (dir == NULL) {
-        LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
-    }
+    ScanCtx.new_table = incremental_get_table();

    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
+    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
    index_descriptor_t original_desc = read_index_descriptor(descriptor_path);

    if (strcmp(original_desc.version, Version) != 0) {
        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
    }

-    struct dirent *de;
-    while ((de = readdir(dir)) != NULL) {
-        if (strncmp(de->d_name, "_index", sizeof("_index") - 1) == 0) {
-            char file_path[PATH_MAX];
-            snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
-            incremental_read(ScanCtx.original_table, file_path, &original_desc);
-        }
-    }
-    closedir(dir);
+    READ_INDICES(
+            file_path,
+            args->incremental,
+            incremental_read(ScanCtx.original_table, file_path, &original_desc),
+            LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
+            TRUE
+    );

    LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
 }

+/**
+ * Saves an incremental index.
+ * Before calling this function, the scanner should have finished writing the main index.
+ *   1. Build original_table - new_table => delete_table
+ *   2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
+ */
+void save_incremental_index(scan_args_t *args) {
+    char dst_path[PATH_MAX];
+    char store_path[PATH_MAX];
+    char file_path[PATH_MAX];
+    char del_path[PATH_MAX];
+    snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
+    snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
+    store_t *source = store_create(store_path, STORE_SIZE_TN);
+
+    LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
+              g_hash_table_size(ScanCtx.original_table),
+              g_hash_table_size(ScanCtx.copy_table),
+              g_hash_table_size(ScanCtx.new_table));
+    snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
+    READ_INDICES(file_path, args->incremental,
+                 incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
+                 perror("incremental_delete"), 1);
+    writer_cleanup();
+
+    READ_INDICES(file_path, args->incremental,
+                 incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
+                 perror("incremental_copy"), 1);
+    writer_cleanup();
+
+    store_destroy(source);
+
+    snprintf(store_path, PATH_MAX, "%stags", args->incremental);
+    snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
+    store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
+    store_copy(source_tags, dst_path);
+    store_destroy(source_tags);
+}
+
+/**
+ * An index can be either incremental or non-incremental (initial index).
+ * For an initial index, there is only the "main" index.
+ * For an incremental index, there are, additionally:
+ *   - An "original" index, referencing all files unchanged since the previous index.
+ *   - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
+ * Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
+ * and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
+ * the old "delete" index can be safely ignored.
+ */
 void sist2_scan(scan_args_t *args) {

    ScanCtx.mime_table = mime_get_mime_table();
@@ -320,7 +391,7 @@ void sist2_scan(scan_args_t *args) {

    initialize_scan_context(args);

-    init_dir(ScanCtx.index.path);
+    init_dir(ScanCtx.index.path, args);

    char store_path[PATH_MAX];
    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
@@ -335,10 +406,10 @@ void sist2_scan(scan_args_t *args) {
        load_incremental_index(args);
    }

-    ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE);
+    ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
    tpool_start(ScanCtx.pool);

-    ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
+    ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
    tpool_start(ScanCtx.writer_pool);

    if (args->list_path) {
@@ -364,35 +435,11 @@ void sist2_scan(scan_args_t *args) {
    LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
    LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
    LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
+    LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
+    LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)

    if (args->incremental != NULL) {
-        char dst_path[PATH_MAX];
-        snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
-        snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
-        store_t *source = store_create(store_path, STORE_SIZE_TN);
-
-        DIR *dir = opendir(args->incremental);
-        if (dir == NULL) {
-            perror("opendir");
-            return;
-        }
-        struct dirent *de;
-        while ((de = readdir(dir)) != NULL) {
-            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
-                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
-                incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
-            }
-        }
-        closedir(dir);
-        store_destroy(source);
-        writer_cleanup();
-
-        snprintf(store_path, PATH_MAX, "%stags", args->incremental);
-        snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
-        store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
-        store_copy(source_tags, dst_path);
-        store_destroy(source_tags);
+        save_incremental_index(args);
    }

    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
@@ -402,17 +449,20 @@ void sist2_scan(scan_args_t *args) {
 }

 void sist2_index(index_args_t *args) {
+    char file_path[PATH_MAX];

    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
+    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
    IndexCtx.batch_size = args->batch_size;
+    IndexCtx.needs_es_connection = !args->print;

-    if (!args->print) {
+    if (IndexCtx.needs_es_connection) {
        elastic_init(args->force_reset, args->es_mappings, args->es_settings);
    }

    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
+    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);

    index_descriptor_t desc = read_index_descriptor(descriptor_path);

@@ -428,11 +478,11 @@ void sist2_index(index_args_t *args) {
    }

    char path_tmp[PATH_MAX];
-    snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
+    snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
    IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
    IndexCtx.tags = store_read_all(IndexCtx.tag_store);

-    snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
+    snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
    IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
    IndexCtx.meta = store_read_all(IndexCtx.meta_store);

@@ -443,32 +493,33 @@ void sist2_index(index_args_t *args) {
        f = index_json;
    }

-    void (*cleanup)();
-    if (args->print) {
-        cleanup = NULL;
-    } else {
-        cleanup = elastic_cleanup;
-    }
-
-    IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, args->print == 0);
+    IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
    tpool_start(IndexCtx.pool);

-    struct dirent *de;
-    while ((de = readdir(dir)) != NULL) {
-        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
-            char file_path[PATH_MAX];
-            snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
-            read_index(file_path, desc.id, desc.type, f);
+    READ_INDICES(file_path, args->index_path, {
+        read_index(file_path, desc.id, desc.type, f);
+        LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
+    }, {}, !args->incremental);
+
+    // Only read the _delete index if we're sending data to ES
+    if (!args->print) {
+        snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
+        if (0 == access(file_path, R_OK)) {
+            read_lines(file_path, (line_processor_t) {
+                    .data = NULL,
+                    .func = delete_document
+            });
            LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
        }
    }
+
    closedir(dir);

    tpool_wait(IndexCtx.pool);

    tpool_destroy(IndexCtx.pool);

-    if (!args->print) {
+    if (IndexCtx.needs_es_connection) {
        finish_indexer(args->script, args->async_script, desc.id);
    }

@@ -483,11 +534,13 @@ void sist2_exec_script(exec_args_t *args) {
    LogCtx.verbose = TRUE;

    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
+    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
    index_descriptor_t desc = read_index_descriptor(descriptor_path);

    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
+    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
+    IndexCtx.needs_es_connection = TRUE;

    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)

@@ -499,6 +552,7 @@ void sist2_web(web_args_t *args) {

    WebCtx.es_url = args->es_url;
    WebCtx.es_index = args->es_index;
+    WebCtx.es_insecure_ssl = args->es_insecure_ssl;
    WebCtx.index_count = args->index_count;
    WebCtx.auth_user = args->auth_user;
    WebCtx.auth_pass = args->auth_pass;
@@ -526,13 +580,34 @@ void sist2_web(web_args_t *args) {
        WebCtx.indices[i].desc = read_index_descriptor(path_tmp);

        strcpy(WebCtx.indices[i].path, abs_path);
-        printf("Loaded index: %s\n", WebCtx.indices[i].desc.name);
+        LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
        free(abs_path);
    }

    serve(args->listen_address);
 }

+/**
+ * Callback to handle options such that
+ *
+ *   Unspecified              -> 0: Set to default value
+ *   Specified "0"            -> -1: Disable the option (ex. don't generate thumbnails)
+ *   Negative number          -> Raise error
+ *   Specified a valid number -> Continue as normal
+ */
+int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
+    int specified_value = *(int *) option->value;
+
+    if (specified_value == 0) {
+        *((int *) option->data) = OPTION_VALUE_DISABLE;
+    }
+
+    if (specified_value < 0) {
+        fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
+        exit(1);
+    }
+}
+

 int main(int argc, const char *argv[]) {
    sigsegv_handler = signal(SIGSEGV, sig_handler);
@@ -548,6 +623,7 @@ int main(int argc, const char *argv[]) {
    int arg_version = 0;

    char *common_es_url = NULL;
+    int common_es_insecure_ssl = 0;
    char *common_es_index = NULL;
    char *common_script_path = NULL;
    int common_async_script = 0;
@@ -562,12 +638,21 @@ int main(int argc, const char *argv[]) {

            OPT_GROUP("Scan options"),
            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
-            OPT_FLOAT('q', "quality", &scan_args->quality,
-                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
-            OPT_INTEGER(0, "size", &scan_args->size,
-                        "Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
+            OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
+                        "Total memory threshold in MiB for scan throttling. DEFAULT=0",
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
+            OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
+                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
+                      set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
+            OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
+                        "Thumbnail size, in pixels. DEFAULT=500",
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
+            OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
+                        "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
            OPT_INTEGER(0, "content-size", &scan_args->content_size,
-                        "Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
+                        "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
            OPT_STRING(0, "incremental", &scan_args->incremental,
                       "Reuse an existing index and only scan modified files."),
            OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
@@ -590,8 +675,8 @@ int main(int argc, const char *argv[]) {
            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
            OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
                                                                                  "(see USAGE.md). DEFAULT: 0.0005"),
-            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
-                        "Maximum memory buffer size per thread in MB for files inside archives "
+            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
+                        "Maximum memory buffer size per thread in MiB for files inside archives "
                        "(see USAGE.md). DEFAULT: 2000"),
            OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
            OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
@@ -604,8 +689,11 @@ int main(int argc, const char *argv[]) {
            OPT_GROUP("Index options"),
            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
+            OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
+                        "Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
            OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
@@ -616,6 +704,7 @@ int main(int argc, const char *argv[]) {

            OPT_GROUP("Web options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
            OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
@@ -626,6 +715,7 @@ int main(int argc, const char *argv[]) {

            OPT_GROUP("Exec-script options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -655,6 +745,10 @@ int main(int argc, const char *argv[]) {
    index_args->es_index = common_es_index;
    exec_args->es_index = common_es_index;

+    web_args->es_insecure_ssl = common_es_insecure_ssl;
+    index_args->es_insecure_ssl = common_es_insecure_ssl;
+    exec_args->es_insecure_ssl = common_es_insecure_ssl;
+
    index_args->script_path = common_script_path;
    exec_args->script_path = common_script_path;
    index_args->threads = common_threads;
@@ -698,9 +792,8 @@ int main(int argc, const char *argv[]) {
        sist2_exec_script(exec_args);

    } else {
-        fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
        argparse_usage(&argparse);
-        goto end;
+        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
    }
    printf("\n");

--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -5,6 +5,7 @@
 #include "mime.h"
 #include "src/io/serialize.h"
 #include "src/parsing/sidecar.h"
+#include "src/magic_generated.c"

 #include <magic.h>

@@ -69,7 +70,7 @@ void parse(void *arg) {
    doc->base = (short) job->base;

    char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
-    MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
+    generate_doc_id(rel_path, doc->doc_id);

    doc->meta_head = NULL;
    doc->meta_tail = NULL;
@@ -77,25 +78,33 @@ void parse(void *arg) {
    doc->size = job->vfile.info.st_size;
    doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;

-    int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
+    int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
        pthread_mutex_lock(&ScanCtx.copy_table_mu);
-        incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5);
+        incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
        pthread_mutex_unlock(&ScanCtx.copy_table_mu);

        pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
        ScanCtx.dbg_skipped_files_count += 1;
        pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);

+        CLOSE_FILE(job->vfile)
+        free(doc->filepath);
+        free(doc);
+
        return;
    }

+    if (ScanCtx.new_table != NULL) {
+        pthread_mutex_lock(&ScanCtx.copy_table_mu);
+        incremental_mark_file(ScanCtx.new_table, doc->doc_id);
+        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
+    }
+
    char *buf[MAGIC_BUF_SIZE];

    if (LogCtx.very_verbose) {
-        char path_md5_str[MD5_STR_LENGTH];
-        buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
    }

    if (job->vfile.info.st_size == 0) {
@@ -123,16 +132,27 @@ void parse(void *arg) {
                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
            }

-            CLOSE_FILE(job->vfile)
-
            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
            ScanCtx.dbg_failed_files_count += 1;
            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
+
+            CLOSE_FILE(job->vfile)
+            free(doc->filepath);
+            free(doc);
+
            return;
        }

        magic_t magic = magic_open(MAGIC_MIME_TYPE);
-        magic_load(magic, NULL);
+
+        const char *magic_buffers[1] = {magic_database_buffer,};
+        size_t sizes[1] = {sizeof(magic_database_buffer),};
+
+        int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
+
+        if (load_ret != 0) {
+            LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
+        }

        const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
        if (magic_mime_str != NULL) {
@@ -205,10 +225,10 @@ void parse(void *arg) {
    abort:

    //Parent meta
-    if (!md5_digest_is_null(job->parent)) {
-        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
+    if (job->parent[0] != '\0') {
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
        meta_parent->key = MetaParent;
-        buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
+        strcpy(meta_parent->str_val, job->parent);
        APPEND_META((doc), meta_parent)

        doc->has_parent = TRUE;
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@@ -23,16 +23,19 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    }
    char *json_str = cJSON_PrintUnformatted(json);

-    unsigned char path_md5[MD5_DIGEST_LENGTH];
-    MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
-        path_md5);
+    char assoc_doc_id[SIST_DOC_ID_LEN];

-    char path_md5_str[MD5_STR_LENGTH];
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
+    char rel_path[PATH_MAX];
+    size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
+    memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
+    *(rel_path + rel_path_len) = '\0';

-    store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
+    generate_doc_id(rel_path, assoc_doc_id);
+
+    store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
+                strlen(json_str) + 1);

    cJSON_Delete(json);
    free(json_str);
    free(buf);
-}
+}
--- a/src/sist.h
+++ b/src/sist.h
@@ -27,10 +27,6 @@

 #define UNUSED(x) __attribute__((__unused__))  x

-#define MD5_STR_LENGTH 33
-#define SHA1_STR_LENGTH 41
-#define SHA1_DIGEST_LENGTH 20
-
 #include "util.h"
 #include "log.h"
 #include "types.h"
@@ -53,13 +49,15 @@
 #include <ctype.h>
 #include "git_hash.h"

-#define VERSION "2.11.6"
+#define VERSION "2.12.1"
 static const char *const Version = VERSION;

 #ifndef SIST_PLATFORM
 #define SIST_PLATFORM unknown
 #endif

+#define EXPECTED_MONGOOSE_VERSION "7.6"
+
 #define Q(x) #x
 #define QUOTE(x) Q(x)

--- a/src/stats.c
+++ b/src/stats.c
@@ -20,7 +20,7 @@ typedef struct {
    long count;
 } agg_t;

-void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
+void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {

    if (cJSON_GetObjectItem(document, "parent") != NULL) {
        return;
@@ -96,16 +96,8 @@ void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
 }

 void read_index_into_tables(index_t *index) {
-    DIR *dir = opendir(index->path);
-    struct dirent *de;
-    while ((de = readdir(dir)) != NULL) {
-        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
-            char file_path[PATH_MAX];
-            snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
-            read_index(file_path, index->desc.id, index->desc.type, fill_tables);
-        }
-    }
-    closedir(dir);
+    char file_path[PATH_MAX];
+    READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1);
 }

 static size_t rfind(const char *str, int c) {
--- a/src/tpool.c
+++ b/src/tpool.c
@@ -28,6 +28,9 @@ typedef struct tpool {
    int work_cnt;
    int done_cnt;
    int busy_cnt;
+    int throttle_stuck_cnt;
+    size_t mem_limit;
+    size_t page_size;

    int free_arg;
    int stop;
@@ -114,13 +117,44 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
    return 1;
 }

+/**
+ * see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
+ */
+__always_inline
+static size_t _get_total_mem(tpool_t* pool) {
+    FILE* statmfile = fopen("/proc/self/statm", "r");
+    if (!statmfile)
+      return 0;
+
+    long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
+    long int m_resident;
+
+    int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
+        &dummy, /* m_virt */
+        &m_resident,
+        &dummy2, /* m_share */
+        &dummy3, /* m_trs */
+        &dummy4, /* unused since Linux 2.6; always 0 */
+        &dummy5, /* m_drs */
+        &dummy6); /* unused since Linux 2.6; always 0 */
+    fclose(statmfile);
+
+    if (r == 7) {
+        return m_resident * pool->page_size;
+    } else {
+        return 0;
+    }
+}
+
 /**
 * Thread worker function
 */
 static void *tpool_worker(void *arg) {
    tpool_t *pool = arg;
+    int stuck_notified = 0;
+    int throttle_ms = 0;

-    while (1) {
+    while (TRUE) {
        pthread_mutex_lock(&pool->work_mutex);
        if (pool->stop) {
            break;
@@ -138,10 +172,35 @@ static void *tpool_worker(void *arg) {
        pthread_mutex_unlock(&(pool->work_mutex));

        if (work != NULL) {
+            stuck_notified = 0;
+            throttle_ms = 0;
+            while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
+                if (!stuck_notified && throttle_ms >= 90000) {
+                    // notify the pool that this thread is stuck.
+                    pthread_mutex_lock(&(pool->work_mutex));
+                    pool->throttle_stuck_cnt += 1;
+                    if (pool->throttle_stuck_cnt == pool->thread_cnt) {
+                        LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
+                        pool->stop = TRUE;
+                    }
+                    pthread_mutex_unlock(&(pool->work_mutex));
+                    stuck_notified = 1;
+                }
+                usleep(10000);
+                throttle_ms += 10;
+            }
+
            if (pool->stop) {
                break;
            }

+            // we are not stuck anymore. cancel our notification.
+            if (stuck_notified) {
+                pthread_mutex_lock(&(pool->work_mutex));
+                pool->throttle_stuck_cnt -= 1;
+                pthread_mutex_unlock(&(pool->work_mutex));
+            }
+
            work->func(work->arg);
            if (pool->free_arg) {
                free(work->arg);
@@ -243,18 +302,21 @@ void tpool_destroy(tpool_t *pool) {
 * Create a thread pool
 * @param thread_cnt Worker threads count
 */
-tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) {
+tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {

    tpool_t *pool = malloc(sizeof(tpool_t));
    pool->thread_cnt = thread_cnt;
    pool->work_cnt = 0;
    pool->done_cnt = 0;
    pool->busy_cnt = 0;
+    pool->throttle_stuck_cnt = 0;
+    pool->mem_limit = mem_limit;
    pool->stop = FALSE;
    pool->free_arg = free_arg;
    pool->cleanup_func = cleanup_func;
    pool->threads = calloc(sizeof(pthread_t), thread_cnt);
    pool->print_progress = print_progress;
+    pool->page_size = getpagesize();

    pthread_mutex_init(&(pool->work_mutex), NULL);

--- a/src/tpool.h
+++ b/src/tpool.h
@@ -8,7 +8,7 @@ typedef struct tpool tpool_t;

 typedef void (*thread_func_t)(void *arg);

-tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress);
+tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
 void tpool_start(tpool_t *pool);
 void tpool_destroy(tpool_t *pool);

--- a/src/types.h
+++ b/src/types.h
@@ -4,7 +4,7 @@
 #define INDEX_TYPE_NDJSON "ndjson"

 typedef struct index_descriptor {
-    char id[MD5_STR_LENGTH];
+    char id[SIST_INDEX_ID_LEN];
    char version[64];
    long timestamp;
    char root[PATH_MAX];
--- a/src/util.h
+++ b/src/util.h
@@ -10,8 +10,6 @@
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"

-#define MD5_STR_LENGTH 33
-

 char *abspath(const char *path);

@@ -94,52 +92,37 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {


 __always_inline
-static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
-    return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
+static void generate_doc_id(const char *rel_path, char *doc_id) {
+    unsigned char md[MD5_DIGEST_LENGTH];
+
+    MD5((unsigned char *) rel_path, strlen(rel_path), md);
+    buf2hex(md, sizeof(md), doc_id);
 }

-
 __always_inline
-static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
+    char *ptr = malloc(SIST_DOC_ID_LEN);
+    strcpy(ptr, doc_id);
    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
 }

 __always_inline
-static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    strcpy(ptr, path_md5);
-    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
-}
-
-__always_inline
-static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
+static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
    if (table != NULL) {
-        char md5_str[MD5_STR_LENGTH];
-        buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
-    } else {
-        return 0;
-    }
-}
-
-__always_inline
-static int incremental_get_str(GHashTable *table, const char *path_md5) {
-    if (table != NULL) {
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
+        return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
    } else {
        return 0;
    }
 }

 /**
- * Not thread safe!
+ * Marks a file by adding it to a table.
+ * !!Not thread safe.
 */
 __always_inline
-static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
+static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
+    char *ptr = malloc(SIST_DOC_ID_LEN);
+    strcpy(ptr, doc_id);
    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
 }

--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -8,12 +8,23 @@

 #include <src/ctx.h>

+#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
+#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
+#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
+
+static struct mg_http_serve_opts DefaultServeOpts = {
+        .fs = NULL,
+        .ssi_pattern = NULL,
+        .root_dir = NULL,
+        .mime_types = ""
+};
+

 static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
    mg_printf(
            nc,
            "HTTP/1.1 %d %s\r\n"
-            "Server: sist2/" VERSION "\r\n"
+            HTTP_SERVER_HEADER
            "Content-Length: %d\r\n"
            "%s\r\n\r\n",
            status_code, "OK",
@@ -25,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t

 index_t *get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
-        if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
+        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
            return &WebCtx.indices[i];
        }
    }
@@ -50,7 +61,7 @@ store_t *get_tag_store(const char *index_id) {

 void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", "text/html", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
        mg_send(nc, index_html, sizeof(index_html));
@@ -59,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {

 void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {

-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
-        mg_http_reply(nc, 404, "", "");
+    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
+        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_index_id[SIST_INDEX_ID_LEN];
+    memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';

-    index_t *index = get_index_by_id(arg_md5);
+    index_t *index = get_index_by_id(arg_index_id);
    if (index == NULL) {
-        mg_http_reply(nc, 404, "", "");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

    const char *file;
-    switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
+    switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
        case 1:
            file = "treemap.csv";
            break;
@@ -100,12 +111,13 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    strcpy(full_path, index->path);
    strcat(full_path, file);

-    mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
+    struct mg_http_serve_opts opts = {};
+    mg_http_serve_file(nc, hm, full_path, &opts);
 }

 void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", "application/javascript", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
        mg_send(nc, index_js, sizeof(index_js));
@@ -114,7 +126,7 @@ void javascript(struct mg_connection *nc, struct mg_http_message *hm) {

 void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", "application/javascript", NULL);
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
        mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
@@ -138,32 +150,50 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {

 void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {

-    if (hm->uri.len != 68) {
-        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
-        return;
+    int has_thumbnail_index = FALSE;
+
+    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
+
+        if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
+            LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
+            HTTP_REPLY_NOT_FOUND
+            return;
+        }
+        has_thumbnail_index = TRUE;
    }

-    char arg_file_md5[MD5_STR_LENGTH];
-    char arg_index[MD5_STR_LENGTH];
+    char arg_doc_id[SIST_DOC_ID_LEN];
+    char arg_index[SIST_INDEX_ID_LEN];

-    memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
-    memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
-    *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
-
-    unsigned char md5_buf[MD5_DIGEST_LENGTH];
-    hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
+    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';

    store_t *store = get_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

+    char *data;
    size_t data_len = 0;
-    char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
+
+    if (has_thumbnail_index) {
+        const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
+
+        char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
+
+        memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
+        memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
+        *(tn_key + sizeof(tn_key) - 1) = '\0';
+
+        data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
+    } else {
+        data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
+    }
+
    if (data_len != 0) {
        send_response_line(
                nc, 200, data_len,
@@ -173,7 +203,7 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
        mg_send(nc, data, data_len);
        free(data);
    } else {
-        mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }
 }
@@ -182,7 +212,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {

    if (hm->body.len == 0) {
        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
-        mg_http_reply(nc, 500, "", "Invalid request");
+        mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
        return;
    }

@@ -193,7 +223,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
    char url[4096];
    snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);

-    nc->fn_data = web_post_async(url, body);
+    nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
 }

 void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -226,6 +256,11 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {

 void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {

+    if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
+        LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
+                               "serving file from disk might not work as expected.")
+    }
+
    const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
    const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
    const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
@@ -246,10 +281,18 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s

    char disposition[8192];
    snprintf(disposition, sizeof(disposition),
-             "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
+             HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\n"
+             "Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
             name, strlen(ext) == 0 ? "" : ".", ext);

-    mg_http_serve_file(nc, hm, full_path, mime, disposition);
+    char mime_mapping[1024];
+    snprintf(mime_mapping, sizeof(mime_mapping), "%s=%s", ext, mime);
+
+    struct mg_http_serve_opts opts = {
+            .extra_headers = disposition,
+            .mime_types = mime_mapping
+    };
+    mg_http_serve_file(nc, hm, full_path, &opts);
 }

 void cache_es_version() {
@@ -259,7 +302,7 @@ void cache_es_version() {
        return;
    }

-    es_version_t *es_version = elastic_get_version(WebCtx.es_url);
+    es_version_t *es_version = elastic_get_version(WebCtx.es_url, WebCtx.es_insecure_ssl);
    if (es_version != NULL) {
        WebCtx.es_version = es_version;
        is_cached = TRUE;
@@ -270,14 +313,20 @@ void index_info(struct mg_connection *nc) {

    cache_es_version();

+    const char *es_version = "0.0.0";
+    if (WebCtx.es_version != NULL) {
+        es_version = format_es_version(WebCtx.es_version);
+    }
+
    cJSON *json = cJSON_CreateObject();
    cJSON *arr = cJSON_AddArrayToObject(json, "indices");

+    cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
    cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
    cJSON_AddStringToObject(json, "version", Version);
-    cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
+    cJSON_AddStringToObject(json, "esVersion", es_version);
    cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
-    cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
+    cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
    cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
    cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
    cJSON_AddStringToObject(json, "lang", WebCtx.lang);
@@ -310,55 +359,19 @@ void index_info(struct mg_connection *nc) {
 }


-void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
-
-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
-        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
-        return;
-    }
-
-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
-
-    cJSON *doc = elastic_get_document(arg_md5);
-    cJSON *source = cJSON_GetObjectItem(doc, "_source");
-
-    cJSON *index_id = cJSON_GetObjectItem(source, "index");
-    if (index_id == NULL) {
-        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
-        return;
-    }
-
-    index_t *idx = get_index_by_id(index_id->valuestring);
-    if (idx == NULL) {
-        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
-        return;
-    }
-
-    char *json_str = cJSON_PrintUnformatted(source);
-    send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
-    mg_send(nc, json_str, (int) strlen(json_str));
-    free(json_str);
-    cJSON_Delete(doc);
-}
-
 void file(struct mg_connection *nc, struct mg_http_message *hm) {

-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
+    if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_doc_id[SIST_DOC_ID_LEN];
+    memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';

-    const char *next = arg_md5;
+    const char *next = arg_doc_id;
    cJSON *doc = NULL;
    cJSON *index_id = NULL;
    cJSON *source = NULL;
@@ -369,7 +382,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
        index_id = cJSON_GetObjectItem(source, "index");
        if (index_id == NULL) {
            cJSON_Delete(doc);
-            mg_http_reply(nc, 404, "", "Not found");
+            HTTP_REPLY_NOT_FOUND
            return;
        }
        cJSON *parent = cJSON_GetObjectItem(source, "parent");
@@ -383,7 +396,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {

    if (idx == NULL) {
        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

@@ -409,7 +422,6 @@ void status(struct mg_connection *nc) {
 typedef struct {
    char *name;
    int delete;
-    char *path_md5_str;
    char *doc_id;
 } tag_req_t;

@@ -429,12 +441,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
        return NULL;
    }

-    cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
-    if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
-        strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
-        return NULL;
-    }
-
    cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
    if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
        return NULL;
@@ -443,33 +449,32 @@ tag_req_t *parse_tag_request(cJSON *json) {
    tag_req_t *req = malloc(sizeof(tag_req_t));
    req->delete = arg_delete->valueint;
    req->name = arg_name->valuestring;
-    req->path_md5_str = arg_path_md5->valuestring;
    req->doc_id = arg_doc_id->valuestring;

    return req;
 }

 void tag(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char arg_index[MD5_STR_LENGTH];
-    memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
+    char arg_index[SIST_INDEX_ID_LEN];
+    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';

    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
        LOG_DEBUG("serve.c", "Invalid tag request")
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

    store_t *store = get_tag_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
-        mg_http_reply(nc, 404, "", "Not found");
+        HTTP_REPLY_NOT_FOUND
        return;
    }

@@ -490,7 +495,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
    cJSON *arr = NULL;

    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
+    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
    if (data_len == 0) {
        arr = cJSON_CreateArray();
    } else {
@@ -526,7 +531,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {

        char url[4096];
        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf);
+        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);

    } else {
        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -546,11 +551,11 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {

        char url[4096];
        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf);
+        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
    }

    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
+    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
    store_flush(store);

    free(arg_req);
@@ -612,10 +617,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                return;
            }
            tag(nc, hm);
-        } else if (mg_http_match_uri(hm, "/d/*")) {
-            document_info(nc, hm);
        } else {
-            mg_http_reply(nc, 404, "", "Page not found");
+            HTTP_REPLY_NOT_FOUND
        }

    } else if (ev == MG_EV_POLL) {
@@ -645,7 +648,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                        free(tmp);
                    }

-                    mg_http_reply(nc, 500, "", "");
+                    mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
+                                  "Elasticsearch error, see server logs.");
                }

                free_response(r);
@@ -659,7 +663,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo

 void serve(const char *listen_address) {

-    printf("Starting web server @ http://%s\n", listen_address);
+    LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)

    struct mg_mgr mgr;
    mg_mgr_init(&mgr);
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/tests/test_scan.py
+++ b/tests/test_scan.py
@@ -35,23 +35,35 @@ def sist2_index(files, *args):
    path = copy_files(files)

    shutil.rmtree("test_i", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i", *args)
+    sist2("scan", path, "-o", "test_i", "-t12", *args)
    return iter(sist2_index_to_dict("test_i"))


-def sist2_incremental_index(files, func=None, *args):
+def get_lmdb_contents(path):
+    import lmdb
+
+    env = lmdb.open(path)
+
+    txn = env.begin(write=False)
+
+    return dict((k, v) for k, v in txn.cursor())
+
+
+def sist2_incremental_index(files, func=None, incremental_index=False, *args):
    path = copy_files(files)

    if func:
        func(path)

    shutil.rmtree("test_i_inc", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
-    return iter(sist2_index_to_dict("test_i_inc"))
+    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
+    return iter(sist2_index_to_dict("test_i_inc", incremental_index))


-def sist2_index_to_dict(index):
-    res = sist2("index", "--print", index)
+def sist2_index_to_dict(index, incremental_index=False):
+    args = ["--incremental-index"] if incremental_index else []
+
+    res = sist2("index", "--print", "--very-verbose", *args, index)

    for line in res.splitlines():
        if line:
@@ -74,8 +86,31 @@ class ScanTest(unittest.TestCase):
                pass

        file_count = sum(1 for _ in sist2_index(TEST_FILES))
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
+        lmdb_full = get_lmdb_contents("test_i/thumbs")
+
+        # Remove files
+        num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
+        lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_rm1, file_count - 2)
+        self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
+
+        # add files (incremental_index=True)
+        num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
+        lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_add_inc, 3)
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
+
+        # add files
+        num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
+        lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
+        self.assertEqual(num_files_add, file_count + 3)
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
+
+        # (No action)
+        sist2_incremental_index(TEST_FILES)
+        lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
+
+        self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))


 if __name__ == "__main__":
--- a/third-party/libscan/CMakeLists.txt
+++ b/third-party/libscan/CMakeLists.txt
@@ -6,10 +6,11 @@ set(CMAKE_C_STANDARD 11)
 option(BUILD_TESTS "Build tests" on)

 add_subdirectory(third-party/antiword)
-add_compile_definitions(
-        antiword
-        NDEBUG
-)
+
+set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
+set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+add_subdirectory(third-party/libmobi)

 add_library(
        scan
@@ -32,6 +33,54 @@ add_library(
        libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
 set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)

+if (SIST_DEBUG)
+    add_compile_definitions(
+            antiword
+            DEBUG
+    )
+    target_compile_options(
+            antiword
+            PRIVATE
+            -g
+            -fstack-protector
+            -fno-omit-frame-pointer
+            -fsanitize=address
+            -fno-inline
+    )
+elseif (SIST_FAST)
+    add_compile_definitions(
+            antiword
+            NDEBUG
+    )
+
+    target_compile_options(
+            scan
+            PRIVATE
+
+            -Ofast
+            -march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            -freciprocal-math
+    )
+else()
+    add_compile_definitions(
+            antiword
+            NDEBUG
+    )
+
+    target_compile_options(
+            scan
+            PRIVATE
+
+            -Ofast
+            #-march=native
+            -fno-stack-protector
+            -fomit-frame-pointer
+            #-freciprocal-math
+    )
+endif()
+
 set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)

 find_package(cJSON CONFIG REQUIRED)
@@ -69,35 +118,15 @@ target_compile_options(
        -g
 )

-include(ExternalProject)
-find_program(MAKE_EXE NAMES gmake nmake make)
-ExternalProject_Add(
-        libmobi
-        GIT_REPOSITORY https://github.com/simon987/libmobi.git
-        GIT_TAG "public"
-
-        UPDATE_COMMAND ""
-        PATCH_COMMAND ""
-        TEST_COMMAND ""
-        CONFIGURE_COMMAND ./autogen.sh && ./configure
-        INSTALL_COMMAND ""
-
-        PREFIX "third-party/ext_libmobi"
-        SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
-        BINARY_DIR "third-party/ext_libmobi/src/libmobi"
-
-        BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
-)
-
-SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
-SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
-
 if (SIST_DEBUG)
    SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
 else()
    SET(FFMPEG_DEBUG "")
 endif()

+include(ExternalProject)
+find_program(MAKE_EXE NAMES gmake nmake make)
+
 ExternalProject_Add(
        ffmpeg
        GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@@ -143,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp

 add_dependencies(
        scan
-        libmobi
        ffmpeg
        antiword
        libwpd
+        mobi
 )

 target_link_libraries(
@@ -164,8 +193,6 @@ target_link_libraries(
        ${MUPDF_LIB}
        openjp2

-        ${MOBI_LIB_DIR}/libmobi.a
-
        ${WPD_LIB_DIR}/libwpd-0.9.a
        ${WPD_LIB_DIR}/libwpd-stream-0.9.a

@@ -202,6 +229,7 @@ target_link_libraries(
        ${GUMBO_LIB}
        dl
        antiword
+        mobi
        unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
 )

--- a/third-party/libscan/libscan/arc/arc.c
+++ b/third-party/libscan/libscan/arc/arc.c
@@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
        sub_job->vfile.logf = ctx->logf;
        sub_job->vfile.has_checksum = FALSE;
        sub_job->vfile.calculate_checksum = f->calculate_checksum;
-        memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
+        strcpy(sub_job->parent, doc->doc_id);

        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
            sub_job->vfile.info = *archive_entry_stat(entry);
--- a/third-party/libscan/libscan/comic/comic.c
+++ b/third-party/libscan/libscan/comic/comic.c
@@ -12,7 +12,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
    struct archive_entry *entry = NULL;
    arc_data_t arc_data;

-    if (ctx->tn_size <= 0) {
+    if (!ctx->enable_tn) {
        return;
    }

@@ -44,7 +44,20 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
                    break;
                }

-                ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
+                scan_media_ctx_t media_ctx = {
+                        .tn_count = ctx->enable_tn ? 1 : 0,
+                        .tn_size = ctx->tn_size,
+                        .tn_qscale = ctx->tn_qscale,
+                        .tesseract_lang = NULL,
+                        .tesseract_path = NULL,
+                        .read_subtitles = FALSE,
+                        .max_media_buffer = 0,
+                        .log = ctx->log,
+                        .logf = ctx->logf,
+                        .store = ctx->store,
+                };
+
+                ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);
                free(buf);

                if (ret == TRUE) {
--- a/third-party/libscan/libscan/comic/comic.h
+++ b/third-party/libscan/libscan/comic/comic.h
@@ -9,6 +9,7 @@ typedef struct {
    logf_callback_t logf;
    store_callback_t store;

+    int enable_tn;
    int tn_size;
    float tn_qscale;

--- a/third-party/libscan/libscan/ebook/ebook.c
+++ b/third-party/libscan/libscan/ebook/ebook.c
@@ -155,8 +155,8 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
    av_init_packet(&jpeg_packet);
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

-    APPEND_TN_META(doc, pixmap->w, pixmap->h)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+    APPEND_LONG_META(doc, MetaThumbnail, 1)
+    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

    free(samples);
    av_packet_unref(&jpeg_packet);
@@ -283,7 +283,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi

    APPEND_LONG_META(doc, MetaPages, page_count)

-    if (ctx->tn_size > 0) {
+    if (ctx->enable_tn) {
        if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
            fz_drop_stream(fzctx, stream);
            fz_drop_document(fzctx, fzdoc);
@@ -404,7 +404,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {

    text_buffer_t content_buffer = text_buffer_create(ctx->content_size);

-    if (ctx->tn_size <= 0) {
+    if (!ctx->enable_tn) {
        return;
    }

--- a/third-party/libscan/libscan/ebook/ebook.h
+++ b/third-party/libscan/libscan/ebook/ebook.h
@@ -6,6 +6,7 @@
 typedef struct {
    long content_size;
    int tn_size;
+    int enable_tn;
    const char *tesseract_lang;
    const char *tesseract_path;
    pthread_mutex_t mupdf_mutex;
--- a/third-party/libscan/libscan/font/font.c
+++ b/third-party/libscan/libscan/font/font.c
@@ -176,7 +176,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    strcpy(meta_name->str_val, font_name);
    APPEND_META(doc, meta_name)

-    if (ctx->enable_tn == TRUE) {
+    if (!ctx->enable_tn) {
        FT_Done_Face(face);
        free(buf);
        return;
@@ -231,8 +231,8 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    dyn_buffer_t bmp_data = dyn_buffer_create();
    bmp_format(&bmp_data, dimensions, bitmap);

-    APPEND_TN_META(doc, dimensions.width, dimensions.height)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
+    APPEND_LONG_META(doc, MetaThumbnail, 1)
+    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);

    dyn_buffer_destroy(&bmp_data);
    free(bitmap);
--- a/third-party/libscan/libscan/macros.h
+++ b/third-party/libscan/libscan/macros.h
@@ -20,8 +20,10 @@
 #undef ABS
 #define ABS(a) (((a) < 0) ? -(a) : (a))

-#define SHA1_STR_LENGTH 41
-#define SHA1_DIGEST_LENGTH 20
+#define SHA1_DIGEST_LENGTH SHA_DIGEST_LENGTH
+
+#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
+#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)

 #define APPEND_STR_META(doc, keyname, value) \
    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
@@ -35,11 +37,6 @@
    meta_long->long_val = value; \
    APPEND_META(doc, meta_long)}

-#define APPEND_TN_META(doc, width, height) \
-    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
-    meta_str->key = MetaThumbnail; \
-    sprintf(meta_str->str_val, "%04d,%04d", width, height); \
-    APPEND_META(doc, meta_str)}

 #define APPEND_META(doc, meta) \
    meta->next = NULL;\
--- a/third-party/libscan/libscan/media/media.c
+++ b/third-party/libscan/libscan/media/media.c
@@ -4,9 +4,13 @@

 #define MIN_SIZE 32
 #define AVIO_BUF_SIZE 8192
-#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
+#define IS_VIDEO(fmt) ( \
+    (fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
+    && strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
+    && strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
+    && strcmp((fmt)->iformat->name, "png_pipe") != 0 \
+    )

-#define STREAM_IS_IMAGE (stream->nb_frames <= 1)

 #define STORE_AS_IS ((void*)-1)

@@ -252,7 +256,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
    for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);

 __always_inline
-static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
+static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {

    AVDictionaryEntry *tag = NULL;
    while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
@@ -270,7 +274,7 @@ static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
        } else if (strcmp(key, "album") == 0) {
            APPEND_TAG_META(MetaAlbum)
        } else if (strcmp(key, "comment") == 0) {
-            APPEND_TAG_META(MetaContent)
+            append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
        }
    }
 }
@@ -280,18 +284,22 @@ static void
 append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {

    if (is_video) {
-        meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
-        meta_duration->key = MetaMediaDuration;
-        meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
-        if (meta_duration->long_val > INT32_MAX) {
-            meta_duration->long_val = 0;
+        if (pFormatCtx->duration / AV_TIME_BASE != 0) {
+            meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
+            meta_duration->key = MetaMediaDuration;
+            meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
+            if (meta_duration->long_val > INT32_MAX) {
+                meta_duration->long_val = 0;
+            }
+            APPEND_META(doc, meta_duration)
        }
-        APPEND_META(doc, meta_duration)

-        meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
-        meta_bitrate->key = MetaMediaBitrate;
-        meta_bitrate->long_val = pFormatCtx->bit_rate;
-        APPEND_META(doc, meta_bitrate)
+        if (pFormatCtx->bit_rate != 0) {
+            meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
+            meta_bitrate->key = MetaMediaBitrate;
+            meta_bitrate->long_val = pFormatCtx->bit_rate;
+            APPEND_META(doc, meta_bitrate)
+        }
    }

    AVDictionaryEntry *tag = NULL;
@@ -398,6 +406,109 @@ void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *dec
    av_frame_free(&rgb_frame);
 }

+#define SAVE_THUMBNAIL_OK 0
+#define SAVE_THUMBNAIL_SKIPPED 1
+#define SAVE_THUMBNAIL_FAILED 2
+
+int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder,
+                                    AVStream *stream, int video_stream, document_t *doc, double seek_ratio,
+                                    int thumbnail_index) {
+
+    if (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
+        int seek_ok = FALSE;
+
+        double target_timestamp = (double) pFormatCtx->duration * seek_ratio;
+        long ts = (long) target_timestamp;
+
+        int seek_ret = avformat_seek_file(
+                // Allow +- 1s
+                pFormatCtx, -1, ts - AV_TIME_BASE, ts, ts + AV_TIME_BASE,
+                0
+        );
+
+        if (seek_ret == 0) {
+            seek_ok = TRUE;
+        } else {
+            CTX_LOG_DEBUGF(
+                    doc->filepath,
+                    "(media.c) Could not seek media file: %s", av_err2str(seek_ret)
+            )
+        }
+
+        if (seek_ok == FALSE && thumbnail_index != 0) {
+            CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
+            return SAVE_THUMBNAIL_FAILED;
+        }
+    }
+
+    frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
+    if (frame_and_packet == NULL) {
+        return SAVE_THUMBNAIL_FAILED;
+    }
+
+    if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx) && thumbnail_index == 0) {
+        ocr_image(ctx, doc, decoder, frame_and_packet->frame);
+    }
+
+    // NOTE: OCR'd content takes precedence over exif image description
+    if (thumbnail_index == 0) {
+        append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
+    }
+
+    // Scale frame
+    AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
+
+    if (scaled_frame == NULL) {
+        frame_and_packet_free(frame_and_packet);
+        return SAVE_THUMBNAIL_FAILED;
+    }
+
+    int return_value;
+
+    if (scaled_frame == STORE_AS_IS) {
+        return_value = SAVE_THUMBNAIL_OK;
+
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
+                   frame_and_packet->packet->size);
+    } else {
+        // Encode frame to jpeg
+        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
+                                                          ctx->tn_qscale);
+        avcodec_send_frame(jpeg_encoder, scaled_frame);
+
+        AVPacket jpeg_packet;
+        av_init_packet(&jpeg_packet);
+        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
+
+        // Save thumbnail
+        if (thumbnail_index == 0) {
+            ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
+            return_value = SAVE_THUMBNAIL_OK;
+
+        } else if (thumbnail_index > 1) {
+            return_value = SAVE_THUMBNAIL_OK;
+            // TO FIX: the 2nd rendered frame is always broken, just skip it until
+            //  I figure out a better fix.
+            thumbnail_index -= 1;
+
+            char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
+            snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
+
+            ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
+        } else {
+            return_value = SAVE_THUMBNAIL_SKIPPED;
+        }
+
+        avcodec_free_context(&jpeg_encoder);
+        av_packet_unref(&jpeg_packet);
+        av_free(*scaled_frame->data);
+        av_frame_free(&scaled_frame);
+    }
+
+    frame_and_packet_free(frame_and_packet);
+    return return_value;
+}
+
 void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {

    int video_stream = -1;
@@ -455,10 +566,10 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
    }

    if (audio_stream != -1) {
-        append_audio_meta(pFormatCtx, doc);
+        append_audio_meta(ctx, pFormatCtx, doc);
    }

-    if (video_stream != -1 && ctx->tn_size > 0) {
+    if (video_stream != -1 && ctx->tn_count > 0) {
        AVStream *stream = pFormatCtx->streams[video_stream];

        if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@@ -473,69 +584,39 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
        avcodec_parameters_to_context(decoder, stream->codecpar);
        avcodec_open2(decoder, video_codec, NULL);

-        //Seek
-        if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
-            int seek_ret;
-            for (int i = 20; i >= 0; i--) {
-                seek_ret = av_seek_frame(pFormatCtx, video_stream,
-                                         (long) ((double) stream->duration * 0.10), 0);
-                if (seek_ret == 0) {
-                    break;
-                }
+        int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
+
+        int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
+                                      video_duration_in_seconds >= 15)
+                                     // Limit to ~1 thumbnail every 7s
+                                     ? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
+                                     : 1;
+
+        const double seek_increment = thumbnails_to_generate == 1
+                                      ? 0.10
+                                      : 1.0 / (thumbnails_to_generate + 1);
+
+        int number_of_thumbnails_generated = 0;
+        int save_thumbnail_ret;
+
+        for (int i = 0; i < thumbnails_to_generate; i++) {
+            double seek_ratio = seek_increment * i + seek_increment * 0.9;
+
+            save_thumbnail_ret = decode_frame_and_save_thumbnail(ctx, pFormatCtx, decoder, stream, video_stream, doc,
+                                                                 seek_ratio, i);
+            if (save_thumbnail_ret == SAVE_THUMBNAIL_FAILED) {
+                break;
+            }
+
+            if (save_thumbnail_ret == SAVE_THUMBNAIL_OK) {
+                number_of_thumbnails_generated += 1;
            }
        }

-        frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
-        if (frame_and_packet == NULL) {
-            avcodec_free_context(&decoder);
-            avformat_close_input(&pFormatCtx);
-            avformat_free_context(pFormatCtx);
-            return;
+        if (number_of_thumbnails_generated > 0) {
+            APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
        }

-        if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
-            ocr_image(ctx, doc, decoder, frame_and_packet->frame);
-        }
-
-        // NOTE: OCR'd content takes precedence over exif image description
-        append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
-
-        // Scale frame
-        AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
-
-        if (scaled_frame == NULL) {
-            frame_and_packet_free(frame_and_packet);
-            avcodec_free_context(&decoder);
-            avformat_close_input(&pFormatCtx);
-            avformat_free_context(pFormatCtx);
-            return;
-        }
-
-        if (scaled_frame == STORE_AS_IS) {
-            APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
-            ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
-                       frame_and_packet->packet->size);
-        } else {
-            // Encode frame to jpeg
-            AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
-                                                              ctx->tn_qscale);
-            avcodec_send_frame(jpeg_encoder, scaled_frame);
-
-            AVPacket jpeg_packet;
-            av_init_packet(&jpeg_packet);
-            avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
-
-            // Save thumbnail
-            APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
-            ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
-
-            avcodec_free_context(&jpeg_encoder);
-            av_packet_unref(&jpeg_packet);
-            av_free(*scaled_frame->data);
-            av_frame_free(&scaled_frame);
-        }
-
-        frame_and_packet_free(frame_and_packet);
        avcodec_free_context(&decoder);
    }

@@ -772,8 +853,8 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
    }

    if (scaled_frame == STORE_AS_IS) {
-        APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
-        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
+        APPEND_LONG_META(doc, MetaThumbnail, 1)
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
                   frame_and_packet->packet->size);
    } else {
        // Encode frame to jpeg
@@ -786,8 +867,8 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

        // Save thumbnail
-        APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
-        ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+        APPEND_LONG_META(doc, MetaThumbnail, 1)
+        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

        av_packet_unref(&jpeg_packet);
        avcodec_free_context(&jpeg_encoder);
--- a/third-party/libscan/libscan/media/media.h
+++ b/third-party/libscan/libscan/media/media.h
@@ -17,6 +17,9 @@ typedef struct {

    int tn_size;
    float tn_qscale;
+    /** Number of thumbnails to generate for videos */
+    int tn_count;
+
    long max_media_buffer;
    int read_subtitles;

--- a/third-party/libscan/libscan/mobi/scan_mobi.c
+++ b/third-party/libscan/libscan/mobi/scan_mobi.c
@@ -1,6 +1,6 @@
 #include "scan_mobi.h"

-#include <mobi.h>
+#include "../../third-party/libmobi/src/mobi.h"
 #include <errno.h>
 #include "stdlib.h"

--- a/third-party/libscan/libscan/msdoc/msdoc.c
+++ b/third-party/libscan/libscan/msdoc/msdoc.c
@@ -76,6 +76,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *b
    scan_ebook_ctx_t ebook_ctx = {
            .content_size = ctx->content_size,
            .tn_size = ctx->tn_size,
+            .enable_tn = TRUE,
            .log = ctx->log,
            .logf = ctx->logf,
            .store = ctx->store,
@@ -137,7 +138,7 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
        return;
    }

-    if (ctx->tn_size > 0) {
+    if (ctx->enable_tn) {
        char *buf_pdf = malloc(buf_len);
        memcpy(buf_pdf, buf, buf_len);
        parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);
--- a/third-party/libscan/libscan/msdoc/msdoc.h
+++ b/third-party/libscan/libscan/msdoc/msdoc.h
@@ -5,6 +5,7 @@

 typedef struct {
    long content_size;
+    int enable_tn;
    int tn_size;
    log_callback_t log;
    logf_callback_t logf;
--- a/third-party/libscan/libscan/ooxml/ooxml.c
+++ b/third-party/libscan/libscan/ooxml/ooxml.c
@@ -190,8 +190,8 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
    char *buf = malloc(entry_size);
    archive_read_data(a, buf, entry_size);

-    APPEND_TN_META(doc, 1, 1) // Size unknown
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
+    APPEND_LONG_META(doc, MetaThumbnail, 1)
+    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
    free(buf);
 }

@@ -238,7 +238,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
                if (read_doc_props(ctx, a, doc) != 0) {
                    break;
                }
-            } else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
+            } else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
                read_thumbnail(ctx, doc, a, entry);
            }
        }
--- a/third-party/libscan/libscan/ooxml/ooxml.h
+++ b/third-party/libscan/libscan/ooxml/ooxml.h
@@ -5,6 +5,7 @@
 #include "../scan.h"

 typedef struct {
+    int enable_tn;
    long content_size;
    log_callback_t log;
    logf_callback_t logf;
--- a/third-party/libscan/libscan/raw/raw.c
+++ b/third-party/libscan/libscan/raw/raw.c
@@ -7,8 +7,22 @@

 #define MIN_SIZE 32

-int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
-    return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
+int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t *doc) {
+
+    scan_media_ctx_t media_ctx = {
+            .read_subtitles = FALSE,
+            .tn_count = 1,
+            .max_media_buffer = 0,
+            .store = ctx->store,
+            .log = ctx->log,
+            .logf = ctx->logf,
+            .tn_size = ctx->tn_size,
+            .tn_qscale = ctx->tn_qscale,
+            .tesseract_lang = NULL,
+            .tesseract_path = NULL
+    };
+
+    return store_image_thumbnail(&media_ctx, img.thumb, img.tlength, doc, "x.jpeg");
 }

 int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
@@ -69,8 +83,8 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
    av_init_packet(&jpeg_packet);
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

-    APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
-    ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
+    APPEND_LONG_META(doc, MetaThumbnail, 1)
+    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

    av_packet_unref(&jpeg_packet);
    av_free(*scaled_frame->data);
@@ -157,7 +171,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {

    APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")

-    if (ctx->tn_size <= 0) {
+    if (!ctx->enable_tn) {
        free(buf);
        libraw_close(libraw_lib);
        return;
@@ -171,25 +185,25 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
        return;
    }

-    int errc = 0;
-    libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
-    if (errc != 0) {
-        free(buf);
-        libraw_dcraw_clear_mem(thumb);
-        libraw_close(libraw_lib);
-        return;
-    }
-
    int tn_ok = 0;
+
    if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
-        tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
+        tn_ok = store_thumbnail_jpeg(ctx, libraw_lib->thumbnail, doc);
    } else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
        // TODO: technically this should work but is currently untested
+
+        int errc = 0;
+        libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
+        if (errc != 0) {
+            free(buf);
+            libraw_dcraw_clear_mem(thumb);
+            libraw_close(libraw_lib);
+            return;
+        }
+
        tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
    }

-    libraw_dcraw_clear_mem(thumb);
-
    if (tn_ok == TRUE) {
        free(buf);
        libraw_close(libraw_lib);
@@ -206,7 +220,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {

    libraw_dcraw_process(libraw_lib);

-    errc = 0;
+    int errc = 0;
    libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
    if (errc != 0) {
        free(buf);
--- a/third-party/libscan/libscan/raw/raw.h
+++ b/third-party/libscan/libscan/raw/raw.h
@@ -8,6 +8,7 @@ typedef struct {
    logf_callback_t logf;
    store_callback_t store;

+    int enable_tn;
    int tn_size;
    float tn_qscale;
 } scan_raw_ctx_t;
--- a/third-party/libscan/libscan/scan.h
+++ b/third-party/libscan/libscan/scan.h
@@ -48,6 +48,9 @@ typedef int scan_code_t;
 #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
 #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);

+#define SIST_DOC_ID_LEN MD5_STR_LENGTH
+#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
+
 enum metakey {
    // String
    MetaContent = 1,
@@ -103,7 +106,7 @@ typedef struct meta_line {


 typedef struct document {
-    unsigned char path_md5[MD5_DIGEST_LENGTH];
+    char doc_id[SIST_DOC_ID_LEN];
    unsigned long size;
    unsigned int mime;
    int mtime;
@@ -159,7 +162,7 @@ typedef struct parse_job_t {
    int base;
    int ext;
    struct vfile vfile;
-    unsigned char parent[MD5_DIGEST_LENGTH];
+    char parent[SIST_DOC_ID_LEN];
    char filepath[1];
 } parse_job_t;

--- a/third-party/libscan/test/main.cpp
+++ b/third-party/libscan/test/main.cpp
@@ -350,9 +350,13 @@ TEST(Comic, ComicIssue160) {
    load_doc_file("libscan-test-files/test_files/ebook/comic-segfault-issue-160.cbr", &f, &doc);

    int tn_size_saved = comic_ctx.tn_size;
-    comic_ctx.tn_size = 0;
+    size_t size_before = store_size;
+
+    comic_ctx.enable_tn = FALSE;
    parse_comic(&comic_ctx, &f, &doc);
-    comic_ctx.tn_size = tn_size_saved;
+    comic_ctx.enable_tn = tn_size_saved;
+
+    ASSERT_EQ(store_size, size_before);

    cleanup(&doc, &f);
 }
@@ -669,8 +673,6 @@ TEST(Ooxml, Docx2Archive) {
    ASSERT_EQ(get_meta(&LastSubDoc, MetaPages)->long_val, 1);
    ASSERT_EQ(strlen(get_meta(&LastSubDoc, MetaContent)->str_val), 2780);

-    fprintf(stderr, "%s\n", get_meta(&LastSubDoc, MetaContent)->str_val);
-
    ooxml_500_ctx.content_size = 500;

    cleanup(&doc, &f);
@@ -921,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
    ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
    ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
    ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
-    ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
    ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
    ASSERT_NE(size_before, store_size);

@@ -1028,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
    cleanup(&doc, &f);
 }

+TEST(Msdoc, Test5Pdf) {
+    vfile_t f;
+    document_t doc;
+    load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
+
+    size_t size_before = store_size;
+
+    parse_msdoc(&msdoc_ctx, &f, &doc);
+
+    ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
+    ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
+    ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
+    ASSERT_NE(size_before, store_size);
+
+    cleanup(&doc, &f);
+}
+
 TEST(Msdoc, TestFuzz1) {
    vfile_t f;
    document_t doc;
@@ -1111,6 +1129,7 @@ int main(int argc, char **argv) {
    ebook_ctx.tesseract_lang = "eng";
    ebook_ctx.tesseract_path = "./tessdata";
    ebook_ctx.tn_size = 500;
+    ebook_ctx.enable_tn = TRUE;
    ebook_ctx.log = noop_log;
    ebook_ctx.logf = noop_logf;
    ebook_ctx.fast_epub_parse = 0;
@@ -1124,12 +1143,14 @@ int main(int argc, char **argv) {

    comic_ctx.tn_qscale = 1.0;
    comic_ctx.tn_size = 500;
+    comic_ctx.enable_tn = TRUE;
    comic_ctx.log = noop_log;
    comic_ctx.logf = noop_logf;
    comic_ctx.store = counter_store;

    comic_big_ctx.tn_qscale = 1.0;
    comic_big_ctx.tn_size = 5000;
+    comic_big_ctx.enable_tn = TRUE;
    comic_big_ctx.log = noop_log;
    comic_big_ctx.logf = noop_logf;
    comic_big_ctx.store = counter_store;
@@ -1138,10 +1159,12 @@ int main(int argc, char **argv) {
    media_ctx.logf = noop_logf;
    media_ctx.store = counter_store;
    media_ctx.tn_size = 500;
+    media_ctx.tn_count = 1;
    media_ctx.tn_qscale = 1.0;
    media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;

    ooxml_500_ctx.content_size = 500;
+    ooxml_500_ctx.enable_tn = TRUE;
    ooxml_500_ctx.log = noop_log;
    ooxml_500_ctx.logf = noop_logf;
    ooxml_500_ctx.store = counter_store;
@@ -1154,6 +1177,7 @@ int main(int argc, char **argv) {
    raw_ctx.logf = noop_logf;
    raw_ctx.store = counter_store;
    raw_ctx.tn_size = 500;
+    raw_ctx.enable_tn = TRUE;
    raw_ctx.tn_qscale = 5.0;

    msdoc_ctx.log = noop_log;
@@ -1161,12 +1185,14 @@ int main(int argc, char **argv) {
    msdoc_ctx.store = counter_store;
    msdoc_ctx.content_size = 500;
    msdoc_ctx.tn_size = 500;
+    msdoc_ctx.enable_tn = TRUE;

    msdoc_text_ctx.log = noop_log;
    msdoc_text_ctx.logf = noop_logf;
    msdoc_text_ctx.store = counter_store;
    msdoc_text_ctx.content_size = 500;
    msdoc_text_ctx.tn_size = 0;
+    msdoc_text_ctx.enable_tn = FALSE;

    wpd_ctx.log = noop_log;
    wpd_ctx.logf = noop_logf;
@@ -1179,4 +1205,7 @@ int main(int argc, char **argv) {
    av_log_set_level(AV_LOG_QUIET);
    ::testing::InitGoogleTest(&argc, argv);
    return RUN_ALL_TESTS();
-}
+}
+
+// 0x6130000d2580
+// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...
--- a/third-party/libscan/third-party/antiword
+++ b/third-party/libscan/third-party/antiword
--- a/third-party/libscan/third-party/libmobi
+++ b/third-party/libscan/third-party/libmobi
Author	SHA1	Message	Date
simon987	4e1109c528	Merge pull request #288 from simon987/dev v2.12.1	2022-04-23 10:30:19 -04:00
simon987	f87de89275	Version bump	2022-04-23 10:29:50 -04:00
simon987	1205981a11	CURL error handling, fix ES version handling, support for ES8, add --es-insecure-ssl argument	2022-04-23 10:29:31 -04:00
simon987	09613eaaf9	import magic database as a blob as last resort to make it work	2022-04-18 12:55:22 -04:00
simon987	a74726be55	Merge pull request #285 from simon987/dependabot/npm_and_yarn/sist2-vue/async-2.6.4 Bump async from 2.6.3 to 2.6.4 in /sist2-vue	2022-04-17 13:42:40 -04:00
dependabot[bot]	cb228052d2	Bump async from 2.6.3 to 2.6.4 in /sist2-vue Bumps [async](https://github.com/caolan/async) from 2.6.3 to 2.6.4. - [Release notes](https://github.com/caolan/async/releases) - [Changelog](https://github.com/caolan/async/blob/v2.6.4/CHANGELOG.md) - [Commits](https://github.com/caolan/async/compare/v2.6.3...v2.6.4) --- updated-dependencies: - dependency-name: async dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2022-04-17 17:41:14 +00:00
simon987	fe56da95d5	Merge pull request #284 from simon987/dev v2.12.0	2022-04-17 13:38:42 -04:00
simon987	9f2ad58f78	bump version	2022-04-17 12:30:14 -04:00
simon987	84d9bf4323	Fix cmake libmobi build maybe	2022-04-17 12:23:45 -04:00
simon987	90aa90f3f3	Update antiword	2022-04-17 11:47:33 -04:00
simon987	3fad07360c	Merge pull request #283 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue	2022-04-17 10:12:10 -04:00
dependabot[bot]	00c3a640d0	Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6. - [Release notes](https://github.com/substack/minimist/releases) - [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6) --- updated-dependencies: - dependency-name: minimist dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2022-04-17 12:53:12 +00:00
simon987	730e495bde	Enable highlight in document info modal, remove /d/ endpoint	2022-04-16 16:11:17 -04:00
simon987	54df1dfcf7	Fix spacebar not working in search bar	2022-04-16 13:51:36 -04:00
simon987	a75675ecea	Fix thumbnail copy bug, update tests	2022-04-16 11:48:43 -04:00
simon987	901035da15	Build libmobi with cmake, update to 0.10	2022-04-15 16:01:40 -04:00
simon987	ceb7265639	Fix max_analyzed_offset (again?)	2022-04-15 15:35:39 -04:00
simon987	036ed9ea1e	Update libmagic cmake things	2022-04-15 15:35:20 -04:00
simon987	779303a2f7	Print body response when task id cannot be read	2022-04-14 16:24:56 -04:00
simon987	23aee14c07	Fix exec-script & fix memory leak in exec_args_validate	2022-04-14 15:43:24 -04:00
simon987	50b9201be3	Merge pull request #279 from simon987/dependabot/npm_and_yarn/sist2-vue/minimist-1.2.6 Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue	2022-04-05 20:12:03 -04:00
dependabot[bot]	14cfb15661	Bump minimist from 1.2.5 to 1.2.6 in /sist2-vue Bumps [minimist](https://github.com/substack/minimist) from 1.2.5 to 1.2.6. - [Release notes](https://github.com/substack/minimist/releases) - [Commits](https://github.com/substack/minimist/compare/1.2.5...1.2.6) --- updated-dependencies: - dependency-name: minimist dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2022-03-31 23:28:25 +00:00
simon987	125c85d9bb	localize tag filter bar	2022-03-18 09:15:07 -04:00
simon987	474eb95aff	Update antiword	2022-03-17 15:08:55 -04:00
simon987	acf7453057	Add test for large msdoc	2022-03-17 15:05:48 -04:00
simon987	9a949d2694	Use TRUE rather than 1	2022-03-17 09:13:19 -04:00
simon987	dbdc75dcb8	Add filter bar in tag picker	2022-03-17 09:12:43 -04:00
simon987	c575fca91d	Do not store duration or bitrate when the value is 0 or for images	2022-03-05 21:24:59 -05:00
simon987	0bf4244683	Do blank search on page reload when media tab auto-reload is disabled	2022-03-05 20:56:02 -05:00
simon987	eea5ce75f3	Fix query args updating outside of the search page	2022-03-05 20:42:13 -05:00
simon987	9b81856353	Fix some errors in keyboard handler	2022-03-05 20:33:45 -05:00
simon987	a10d6952ba	Fix segfault in print_errors()	2022-03-05 20:33:21 -05:00
simon987	2b639bd4ac	Error handling in get_es_version()	2022-03-05 14:59:37 -05:00
simon987	e9f92330fd	Cleanup macros	2022-03-05 11:18:07 -05:00
simon987	cb37a6e6c1	Fix thumbnail bug in serve	2022-03-05 11:18:07 -05:00
simon987	b82c26f0fb	Add mt_ int_ prefixes in InfoTable	2022-03-05 11:18:06 -05:00
simon987	16a4fb4874	Rework document IDs	2022-03-05 11:18:06 -05:00
simon987	cdc4c0ad3d	Cap maximum thumbnail count to 1000	2022-03-05 11:18:06 -05:00
simon987	d034851ecb	Setup keyboard shortcuts for Lightbox, add option to disable animations	2022-03-05 11:18:06 -05:00
simon987	ea7dfe7c84	Update to mongoose 7.6	2022-03-05 11:18:05 -05:00
simon987	8bfd010f4b	Update dev ES docker script	2022-03-05 11:18:05 -05:00
simon987	499eb2b2e4	Un-break raw file thumbnails	2022-03-05 11:18:05 -05:00
simon987	25ab883063	Merge pull request #263 from simon987/dependabot/npm_and_yarn/sist2-vue/url-parse-1.5.10 Bump url-parse from 1.5.4 to 1.5.10 in /sist2-vue	2022-02-28 09:26:15 -05:00
dependabot[bot]	6ab606203f	Bump url-parse from 1.5.4 to 1.5.10 in /sist2-vue Bumps [url-parse](https://github.com/unshiftio/url-parse) from 1.5.4 to 1.5.10. - [Release notes](https://github.com/unshiftio/url-parse/releases) - [Commits](https://github.com/unshiftio/url-parse/compare/1.5.4...1.5.10) --- updated-dependencies: - dependency-name: url-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2022-02-28 04:23:32 +00:00
simon987	6ec98046fa	Merge pull request #262 from yatli/fix_261 fix #261: inherit index id from base index when using incremental scan	2022-02-26 11:37:16 -05:00
Yatao Li	4fac81ca6a	fix #261 : new index ids generated for incremental scan	2022-02-27 00:25:23 +08:00
simon987	2882741926	Fix multiple content metadata bug (but without compilation error this time)	2022-02-20 10:52:22 -05:00
simon987	edba9b7917	Fix multiple content metadata bug	2022-02-20 10:43:34 -05:00
simon987	e89964d592	Fix antiword build	2022-02-20 09:37:24 -05:00
simon987	329afcbe4f	Update docs & UI stuff	2022-02-20 09:13:19 -05:00
simon987	2a2664a5cd	Disable debug in docker image oops	2022-02-20 09:01:17 -05:00
simon987	0d18637e88	Merge pull request #257 from simon987/dev v2.11.7	2022-02-20 08:34:26 -05:00
simon987	8ad9fc9e32	Fix caption path	2022-02-19 14:11:40 -05:00
simon987	f075b542fe	Tweak mem-throttle option	2022-02-19 14:05:50 -05:00
simon987	3d4331b27d	Add thumbnail-count option	2022-02-19 13:45:31 -05:00
simon987	a0db49e7d8	Add file page endpoint	2022-02-19 13:43:44 -05:00
simon987	065146ff8a	Docker fixes	2022-02-19 13:43:44 -05:00
simon987	d58fcbc788	Merge pull request #246 from yatli/mem_cap_dev scan memory threshold	2022-02-13 13:26:18 -05:00
simon987	b483447b1c	Merge pull request #251 from yatli/example_systemd add systemd integration example	2022-02-13 11:56:16 -05:00
Yatao Li	0d68d5fc7f	use --index-incremental	2022-02-14 00:47:01 +08:00
Yatao Li	1813bf505c	add systemd integration example	2022-02-13 19:05:13 +08:00
Yatao Li	9a6e7c7c47	reset throttle timer for each work item	2022-02-13 18:43:25 +08:00
Yatao Li	68252b4e80	query page size on tpool creation	2022-02-13 18:43:25 +08:00
Yatao Li	d1f13f2c84	stop scanning gracefully if memory limit target cannot be met	2022-02-13 18:43:25 +08:00
Yatao Li	6075c21a3a	do not throttle writer/index thread pools	2022-02-13 18:43:23 +08:00
Yatao Li	f3674ffa02	stop threadpool when the memory limit is too low for any worker thread to proceed	2022-02-13 18:42:54 +08:00
Yatao Li	de187eff1c	minor fix	2022-02-13 18:42:54 +08:00
Yatao Li	8e96174e1f	scan memory threshold	2022-02-13 18:42:54 +08:00
simon987	8fa34da02f	Fix some memory leaks, fix tests, fix --print regression	2022-02-11 11:09:29 -05:00
simon987	37919932de	Merge pull request #238 from yatli/dev incremental scan: build delete index. only load from main & original; incremental indexing;	2022-02-11 10:13:26 -05:00
simon987	8ab8124370	CSS tweaks	2022-02-10 21:25:16 -05:00
simon987	bfd080943d	Disable automatic mime map update by default	2022-02-10 21:18:54 -05:00
simon987	c6820b6cc6	Fix CSS border & checkbox bug of index picker	2022-02-10 21:17:32 -05:00
simon987	3c09c45694	Merge pull request #249 from yatli/index_script_fix do not log arg script if null	2022-02-07 15:44:16 -05:00
Yatao Li	bb5c17ec78	do not log arg script if null	2022-02-08 04:24:56 +08:00
Yatao Li	501064da10	parse: fix full scan regression	2022-01-25 19:03:25 +08:00
Yatao Li	8f7edf3190	incremental_delete: read from index file so that we have parent info	2022-01-25 19:03:25 +08:00
Yatao Li	e65905a165	only add new entries into new_table to save memory	2022-01-25 19:03:25 +08:00
Yatao Li	2cb57f3634	index: bulk delete	2022-01-25 19:03:25 +08:00
Yatao Li	679e12f786	unify READ_INDICES to reduce clutter	2022-01-25 19:03:25 +08:00
Yatao Li	291d307689	index: incremental indexing, add stub for index entries removal	2022-01-25 19:03:25 +08:00
Yatao Li	7d40b9e959	incremental scan: build delete index. only load from main & original.	2022-01-25 19:03:25 +08:00
simon987	cf56bdfb74	Add configuration option to use a date picker instead of date slider	2022-01-22 14:41:01 -05:00
simon987	b799a2e976	Fix for infinite reload in mime picker when automatic update is enabled	2022-01-22 13:30:48 -05:00
simon987	727b57b78a	Fix dependabot issue I think	2022-01-22 13:21:34 -05:00
simon987	61cb845a0e	Hotfix to patch segmentation fault when specifying a very long script	2022-01-22 13:17:47 -05:00
simon987	dad14fb66d	Replace "File not found" messages with LOG_FATAL calls	2022-01-22 12:56:03 -05:00
simon987	c98a09d264	Version bump	2022-01-22 12:55:41 -05:00
simon987	b978132ee0	Update readme	2022-01-09 10:20:49 -05:00