Update CI build script

2025-12-13 15:29:04 +00:00 · 2023-04-10 20:01:49 -04:00
50 changed files with 1704 additions and 3027 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,3 @@
 CMakeModules/* linguist-vendored
 **/*_generated.c linguist-vendored
 **/*_generated.h linguist-vendored
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,6 @@ set(CMAKE_C_STANDARD 11)
 option(SIST_DEBUG "Build a debug executable" on)
 option(SIST_FAST "Enable more optimisation flags" off)
 option(SIST_DEBUG_INFO "Turn on debug information in web interface" on)
 add_compile_definitions(
        "SIST_PLATFORM=${SIST_PLATFORM}"
@@ -15,17 +14,7 @@ if (SIST_DEBUG)
    add_compile_definitions(
            "SIST_DEBUG=${SIST_DEBUG}"
    )
-    set(VCPKG_BUILD_TYPE debug)
+endif()
 else ()
    set(VCPKG_BUILD_TYPE release)
 endif ()
 if (SIST_DEBUG_INFO)
    add_compile_definitions(
            "SIST_DEBUG_INFO=${SIST_DEBUG_INFO}"
    )
 endif ()
 add_subdirectory(third-party/libscan)
 set(ARGPARSE_SHARED off)
@@ -58,7 +47,7 @@ add_executable(sist2
        src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
-        src/database/database_stats.c src/database/database_schema.c)
+        src/database/database_stats.c src/database/database_stats.h src/database/database_schema.c)
 set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@@ -96,7 +85,7 @@ if (SIST_DEBUG)
            -fno-omit-frame-pointer
            -fsanitize=address
            -fno-inline
-            #            -O2
+#            -O2
    )
    target_link_options(
            sist2
--- a/10
+++ b/10
@@ -19,12 +19,13 @@ COPY sist2-admin sist2-admin
 RUN cd sist2-vue/ && npm install && npm run build
 RUN cd sist2-admin/frontend/ && npm install && npm run build
-RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
+RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
 RUN cd build && make -j$(nproc)
 RUN strip build/sist2 || mv build/sist2_debug build/sist2
 FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
 WORKDIR /root
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
@@ -51,7 +52,6 @@ RUN mkdir -p /usr/share/tessdata && \
 COPY --from=build /build/build/sist2 /root/sist2
 # sist2-admin
-WORKDIR /root/sist2-admin
+COPY sist2-admin/requirements.txt sist2-admin/
-COPY sist2-admin/requirements.txt /root/sist2-admin/
+RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
-RUN python3 -m pip install --no-cache -r /root/sist2-admin/requirements.txt
+COPY --from=build /build/sist2-admin/ sist2-admin/
 COPY --from=build /build/sist2-admin/ /root/sist2-admin/
--- a/Dockerfile.arm64
+++ b/Dockerfile.arm64
@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>
 WORKDIR /build/
 ADD . /build/
-RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux_docker -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
+RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
 RUN cd build && make -j$(nproc)
 RUN strip build/sist2 || mv build/sist2_debug build/sist2
--- a/README.md
+++ b/README.md
@@ -10,13 +10,13 @@ sist2 (Simple incremental search tool)
 *Warning: sist2 is in early development*
-![search panel](docs/sist2.gif)
+![search panel](docs/sist2.png)
 ## Features
 * Fast, low memory usage, multi-threaded
 * Manage & schedule scan jobs with simple web interface (Docker only)
 * Mobile-friendly Web interface
 * Portable (all its features are packaged in a single executable)
 * Extracts text and metadata from common file types \*
 * Generates thumbnails \*
 * Incremental scanning
@@ -24,60 +24,47 @@ sist2 (Simple incremental search tool)
 * Recursive scan inside archive files \*\*
 * OCR support with tesseract \*\*\*
 * Stats page & disk utilisation visualization
 * Named-entity recognition (client-side) \*\*\*\*
 \* See [format support](#format-support)    
 \*\* See [Archive files](#archive-files)    
 \*\*\* See [OCR](#ocr)
-\*\*\*\* See [Named-Entity Recognition](#NER)
+
 ![stats](docs/stats.png)
 ## Getting Started
 ### Using Docker Compose *(Windows/Linux/Mac)*
 ```yaml
 version: "3"
 services:
  elasticsearch:
    image: elasticsearch:7.17.9
    restart: unless-stopped
    environment:
      - "discovery.type=single-node"
      - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
  sist2-admin:
    image: simon987/sist2:3.0.3
    restart: unless-stopped
    volumes:
      - ./sist2-admin-data/:/sist2-admin/
      - /:/host
    ports:
      - 4090:4090 # sist2
      - 8080:8080 # sist2-admin
    working_dir: /root/sist2-admin/
    entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
 ```
 Navigate to http://localhost:8080/ to configure sist2-admin.
 ### Using the executable file *(Linux/WSL only)*
 1. Have an Elasticsearch (>= 6.8.X, ideally >=7.14.0) instance running
    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
-    2. *(or)* Run using docker:
+    1. *(or)* Run using docker:
        ```bash
        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
        ```
    1. *(or)* Run using docker-compose:
        ```yaml
          elasticsearch:
            image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9
            environment:
              - discovery.type=single-node
              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
        ```
 1. Download sist2 executable
    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases). 
 Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
    2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
       recommended!)*
    3. *(or)* `docker pull simon987/sist2:2.12.1-x64-linux`
-2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
+1. See [Usage guide](docs/USAGE.md)
   Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
 3. See [usage guide](docs/USAGE.md) for command line usage.
-Example usage:
+\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
-1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
+## Example usage
-2. Push index to Elasticsearch: `sist2 index ./documents.sist2`
+
-3. Start web interface: `sist2 web ./documents.sist2`
+See [Usage guide](docs/USAGE.md) for more details
 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
 1. Start web interface: `sist2 web ./docs_idx`
 ## Format support
@@ -95,7 +82,7 @@ Example usage:
 | tar, zip, rar, 7z, ar ...                                                 | Libarchive                                                                   | yes\*    | -           | no                                                                                                                                     |
 | docx, xlsx, pptx                                                          | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | if embedded | creator, modified_by, title                                                                                                            |
 | doc (MS Word 97-2003)                                                     | antiword                                                                     | yes      | no          | author, title                                                                                                                          |
-| mobi, azw, azw3                                                           | libmobi                                                                      | yes      | yes         | author, title                                                                                                                          |
+| mobi, azw, azw3                                                           | libmobi                                                                      | yes      | no          | author, title                                                                                                                          |
 | wpd (WordPerfect)                                                         | libwpd                                                                       | yes      | no          | *planned*                                                                                                                              |
 | json, jsonl, ndjson                                                       | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | -           | -                                                                                                                                      |
@@ -136,44 +123,20 @@ sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
 sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
 ```
 ### NER
 sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported repository URL to 
 **Configuration** > **Machine learning options** > **Model repositories**
 to enable it.
 The text processing is done in your browser, no data is sent to any third-party services.
 See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
 #### List of available repositories:
 | URL                                                                                                     | Maintainer                              | Purpose |
 |---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
 | [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
 <details>
  <summary>Screenshot</summary>
 ![ner](docs/ner.png)
 </details>
 ## Build from source
 You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
-### Using docker
+### With docker (recommended)
 ```bash
 git clone --recursive https://github.com/simon987/sist2/
 cd sist2
-docker build . -t my-sist2-image
+docker build . -f ./Dockerfile -t my-sist2-image
 # Copy sist2 executable from docker image
 docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
 ```
-### Using a linux computer
+### On a linux computer
 1. Install compile-time dependencies
@@ -181,14 +144,15 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
   apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
   ```
-2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
+1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
-3. Install vcpkg dependencies
+
 1. Install vcpkg dependencies
    ```bash
    vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
    ```
-4. Build
+1. Build
    ```bash
    git clone --recursive https://github.com/simon987/sist2/
    (cd sist2-vue; npm install; npm run build)
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -1,64 +1,78 @@
 # Usage
 *More examples (specifically with docker/compose) are in progress*
 * [scan](#scan)
    * [options](#scan-options)
    * [examples](#scan-examples)
    * [index format](#index-format)
 * [index](#index)
    * [options](#index-options)
    * [examples](#index-examples)
 * [web](#web)
    * [options](#web-options)
    * [examples](#web-examples)
    * [rewrite_url](#rewrite_url)
 * [elasticsearch](#elasticsearch)
 * [exec-script](#exec-script)
 * [tagging](#tagging)
 * [sidecar files](#sidecar-files)
 ```
 Usage: sist2 scan [OPTION]... PATH
   or: sist2 index [OPTION]... INDEX
   or: sist2 web [OPTION]... INDEX...
   or: sist2 exec-script [OPTION]... INDEX
 Lightning-fast file system indexer and search tool.
    -h, --help                        show this help message and exit
-    -v, --version                     Print version and exit.
+    -v, --version                     Show version and exit
-    --verbose                         Turn on logging.
+    --verbose                         Turn on logging
-    --very-verbose                    Turn on debug messages.
+    --very-verbose                    Turn on debug messages
    --json-logs                       Output logs in JSON format.
 Scan options
-    -t, --threads=<int>               Number of threads. DEFAULT: 1
+    -t, --threads=<int>               Number of threads. DEFAULT=1
-    -q, --thumbnail-quality=<int>     Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2
+    --mem-throttle=<int>              Total memory threshold in MiB for scan throttling. DEFAULT=0
-    --thumbnail-size=<int>            Thumbnail size, in pixels. DEFAULT: 552
+    -q, --thumbnail-quality=<int>     Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2
-    --thumbnail-count=<int>           Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1
+    --thumbnail-size=<int>            Thumbnail size, in pixels. DEFAULT=500
-    --content-size=<int>              Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768
+    --thumbnail-count=<int>           Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
-    -o, --output=<str>                Output index file path. DEFAULT: index.sist2
+    --content-size=<int>              Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
-    --incremental                     If the output file path exists, only scan new or modified files.
+    --incremental=<str>               Reuse an existing index and only scan modified files.
-    --optimize-index                  Defragment index file after scan to reduce its file size.
+    -o, --output=<str>                Output directory. DEFAULT=index.sist2/
    --rewrite-url=<str>               Serve files from this url instead of from disk.
-    --name=<str>                      Index display name. DEFAULT: index
+    --name=<str>                      Index display name. DEFAULT: (name of the directory)
    --depth=<int>                     Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
-    --archive=<str>                   Archive file mode (skip|list|shallow|recurse). skip: don't scan, list: only save file names as text, shallow: don't scan archives inside archives. DEFAULT: recurse
+    --archive=<str>                   Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
    --archive-passphrase=<str>        Passphrase for encrypted archive files
    --ocr-lang=<str>                  Tesseract language (use 'tesseract --list-langs' to see which are installed on your machine)
    --ocr-images                      Enable OCR'ing of image files.
    --ocr-ebooks                      Enable OCR'ing of ebook files.
-    -e, --exclude=<str>               Files that match this regex will not be scanned.
+    -e, --exclude=<str>               Files that match this regex will not be scanned
-    --fast                            Only index file names & mime type.
+    --fast                            Only index file names & mime type
    --treemap-threshold=<str>         Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
    --mem-buffer=<int>                Maximum memory buffer size per thread in MiB for files inside archives (see USAGE.md). DEFAULT: 2000
    --read-subtitles                  Read subtitles from media files.
-    --fast-epub                       Faster but less accurate EPUB parsing (no thumbnails, metadata).
+    --fast-epub                       Faster but less accurate EPUB parsing (no thumbnails, metadata)
    --checksums                       Calculate file checksums when scanning.
    --list-file=<str>                 Specify a list of newline-delimited paths to be scanned instead of normal directory traversal. Use '-' to read from stdin.
 Index options
-    -t, --threads=<int>               Number of threads. DEFAULT: 1
+    -t, --threads=<int>               Number of threads. DEFAULT=1
-    --es-url=<str>                    Elasticsearch url with port. DEFAULT: http://localhost:9200
+    --es-url=<str>                    Elasticsearch url with port. DEFAULT=http://localhost:9200
-    --es-insecure-ssl                 Do not verify SSL connections to Elasticsearch.
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
-    --es-index=<str>                  Elasticsearch index name. DEFAULT: sist2
+    -p, --print                       Just print JSON documents to stdout.
-    -p, --print                       Print JSON documents to stdout instead of indexing to elasticsearch.
+    --incremental-index               Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch.
    --incremental-index               Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
    --script-file=<str>               Path to user script.
    --mappings-file=<str>             Path to Elasticsearch mappings.
    --settings-file=<str>             Path to Elasticsearch settings.
    --async-script                    Execute user script asynchronously.
-    --batch-size=<int>                Index batch size. DEFAULT: 70
+    --batch-size=<int>                Index batch size. DEFAULT: 100
-    -f, --force-reset                 Reset Elasticsearch mappings and settings.
+    -f, --force-reset                 Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
 Web options
-    --es-url=<str>                    Elasticsearch url. DEFAULT: http://localhost:9200
+    --es-url=<str>                    Elasticsearch url. DEFAULT=http://localhost:9200
-    --es-insecure-ssl                 Do not verify SSL connections to Elasticsearch.
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
-    --es-index=<str>                  Elasticsearch index name. DEFAULT: sist2
+    --bind=<str>                      Listen on this address. DEFAULT=localhost:4090
    --bind=<str>                      Listen for connections on this address. DEFAULT: localhost:4090
    --auth=<str>                      Basic auth in user:password format
    --auth0-audience=<str>            API audience/identifier
    --auth0-domain=<str>              Application domain
@@ -70,15 +84,77 @@ Web options
    --lang=<str>                      Default UI language. Can be changed by the user
 Exec-script options
-    --es-url=<str>                    Elasticsearch url. DEFAULT: http://localhost:9200
+    --es-url=<str>                    Elasticsearch url. DEFAULT=http://localhost:9200
-    --es-insecure-ssl                 Do not verify SSL connections to Elasticsearch.
+    --es-index=<str>                  Elasticsearch index name. DEFAULT=sist2
    --es-index=<str>                  Elasticsearch index name. DEFAULT: sist2
    --script-file=<str>               Path to user script.
    --async-script                    Execute user script asynchronously.
 Made by simon987 <me@simon987.net>. Released under GPL-3.0
 ```
 ## Scan
 ### Scan options
 * `-t, --threads` 
      Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
 * `--mem-throttle`
    Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
    until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
 * `-q, --thumbnail-quality` 
    Thumbnail quality, on a scale of 2 to 32, 2 being the best. See section below for a rough estimate of thumbnail database size
 * `--thumbnail-size` 
    Thumbnail size in pixels.
 * `--thumbnail-count`
    Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
    will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image 
    every ~7s). Set to 0 to completely disable thumbnails.
 * `--content-size` 
    Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
    Repeated whitespace and special characters do not count toward this limit.
    Set to 0 to completely disable content parsing.
 * `--incremental`
    Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
    will be copied to the new index and will not be parsed again.
 * `-o, --output` Output directory. 
 * `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) 
 * `--name` Set the `name` option for the web module
 * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
 * `--archive` Archive file mode.
    * skip: Don't parse
    * list: Only get file names as text
    * shallow: Don't parse archives inside archives.
    * recurse: Scan archives recursively (default)
 * `--ocr-lang`, `--ocr-ebooks`, `--ocr-images` See [OCR](../README.md#OCR)
 * `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any 
    part of the full absolute path.
    Examples: 
    * `-e ".*\.ttf"`: Ignore ttf files
    * `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
    * `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
    * `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
    * `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
     `/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
 * `--fast` Only index file names and mime type
 * `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
    will not be considered for the disk utilisation visualization; their size will be added to
    the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
    and so on.
    In effect, smaller `treemap-threshold` values will yield a more detailed 
    (but also a more cluttered and harder to read) visualization. 
 * `--mem-buffer` Maximum memory buffer size in MiB (per thread) for files inside archives. Media files 
    larger than this number will be read sequentially and no *seek* operations will be supported.
    To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
 * `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
 * `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
 * `--checksums` Calculate file checksums (SHA1) when scanning files. This option does not cause any additional read 
  operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
  files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
 #### Thumbnail database size estimation
 See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
@@ -88,6 +164,8 @@ that is about `8000000 * 36kB = 288GB`.
 ![thumbnail_size](thumbnail_size.png)
 // TODO: add note about LMDB page size 4096
 ### Scan examples
 Simple scan
@@ -97,19 +175,82 @@ sist2 scan ~/Documents
 sist2 scan \
    --threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
    --name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
-    ~/Documents -o ./documents.sist2
+    ~/Documents -o ./documents.idx/
 ```
 Incremental scan
 If the index file does not exist, `--incremental` has no effect.
 ```bash
 sist scan ~/Documents -o ./documents.sist2
 sist scan ~/Documents -o ./documents.sist2 --incremental
 # or
 sist scan ~/Documents -o ./documents.sist2 --incremental
 sist scan ~/Documents -o ./documents.sist2 --incremental
 ```
 sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
 ```
 ### Index format
 A typical `ndjson` type index structure looks like this:
 ```
 documents.idx/
 ├── descriptor.json
 ├── _index_main.ndjson.zst
 ├── treemap.csv
 ├── agg_mime.csv
 ├── agg_date.csv
 ├── add_size.csv
 ├── thumbs/
 |   ├── data.mdb
 |   └── lock.mdb
 ├── tags/
 |   ├── data.mdb
 |   └── lock.mdb
 └── meta/
    ├── data.mdb
    └── lock.mdb
 ```
 The `_index_*.ndjson.zst` files contain the document data in JSON format, in a compressed newline-delemited file.
 The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
 database containing the thumbnails.
 The `descriptor.json` file contains general information about the index. The 
 following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
 The `.csv` are pre-computed aggregations necessary for the stats page.
 *thumbs/*:
 LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
 and values are raw image bytes.
 *\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
 ## Index
 ### Index options
 * `--es-url` 
 Elasticsearch url and port. If you are using docker, make sure that both containers are on the
 same network.
 * `--es-index` 
    Elasticsearch index name. DEFAULT=sist2
 * `-p, --print` 
    Print index in JSON format to stdout.
 * `--incremental-index`
   Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch.
   Only the new changes since the last scan will be sent.
 * `--script-file` 
    Path to user script. See [Scripting](scripting.md).
 * `--mappings-file`
    Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
 * `--settings-file`
    Path to custom Elasticsearch settings. *(See above)*
 * `--async-script` 
    Use `wait_for_completion=false` elasticsearch option while executing user script.
     (See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
 * `--batch-size=<int>` 
    Index batch size. Indexing is generally faster with larger batches, but payloads that
    are too large will fail and additional overhead for retrying with smaller sizes may slow
    down the process.
 * `-f, --force-reset` 
    Reset Elasticsearch mappings and settings.
 * `-t, --threads` Number of threads to use. Ideally, choose a number equal to the number of logical cores of the machine hosting Elasticsearch.
 ### Index examples
@@ -239,8 +380,8 @@ The sidecar file must have exactly the same file path and the `.s2meta` suffix.
 ```
 ```
-sist2 scan ~/Documents -o ./docs.sist2
+sist2 scan ~/Documents -o ./docs.idx
-sist2 index ./docs.sist2
+sist2 index ./docs.idx
 ```
 *NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
--- a/docs/ner.png
+++ b/docs/ner.png
--- a/docs/sist2.gif
+++ b/docs/sist2.gif
--- a/docs/sist2.png
+++ b/docs/sist2.png
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -7,7 +7,7 @@ git submodule update --init --recursive
 mkdir build
 (
  cd build
-  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
+  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
  make -j $(nproc)
  strip sist2
  ./sist2 -v > VERSION
@@ -17,7 +17,7 @@ mv build/sist2 sist2-x64-linux
 (
  cd build
  rm -rf CMakeFiles CMakeCache.txt
-  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
+  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
  make -j  $(nproc)
 )
 mv build/sist2_debug sist2-x64-linux-debug
--- a/scripts/build_arm64.sh
+++ b/scripts/build_arm64.sh
@@ -7,7 +7,7 @@ git submodule update --init --recursive
 mkdir build
 (
  cd build
-  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
+  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
  make -j $(nproc)
  strip sist2
 )
@@ -16,7 +16,7 @@ mv build/sist2 sist2-arm64-linux
 rm -rf CMakeFiles CMakeCache.txt
 (
  cd build
-  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG_INFO=on -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
+  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
  make -j $(nproc)
 )
 mv build/sist2_debug sist2-arm64-linux-debug
--- a/scripts/start_dev_es_8.sh
+++ b/scripts/start_dev_es_8.sh
@@ -1,3 +1,3 @@
 docker run --rm -it --name "sist2-dev-es"\
       	-p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \
-	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.7.0
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:8.1.2
--- a/sist2-admin/frontend/package-lock.json
+++ b/sist2-admin/frontend/package-lock.json
@@ -10491,9 +10491,9 @@
      "integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
    },
    "node_modules/webpack": {
-      "version": "5.78.0",
+      "version": "5.75.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.78.0.tgz",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz",
-      "integrity": "sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==",
+      "integrity": "sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==",
      "dev": true,
      "dependencies": {
        "@types/eslint-scope": "^3.7.3",
@@ -18719,9 +18719,9 @@
      "integrity": "sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE="
    },
    "webpack": {
-      "version": "5.78.0",
+      "version": "5.75.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.78.0.tgz",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz",
-      "integrity": "sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==",
+      "integrity": "sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==",
      "dev": true,
      "requires": {
        "@types/eslint-scope": "^3.7.3",
--- a/sist2-admin/frontend/yarn.lock
+++ b/sist2-admin/frontend/yarn.lock
@@ -1390,14 +1390,14 @@
    thread-loader "^3.0.0"
    webpack "^5.54.0"
-"@vue/cli-plugin-router@^5.0.8", "@vue/cli-plugin-router@~5.0.8":
+"@vue/cli-plugin-router@~5.0.8":
  version "5.0.8"
  resolved "https://registry.npmjs.org/@vue/cli-plugin-router/-/cli-plugin-router-5.0.8.tgz"
  integrity sha512-Gmv4dsGdAsWPqVijz3Ux2OS2HkMrWi1ENj2cYL75nUeL+Xj5HEstSqdtfZ0b1q9NCce+BFB6QnHfTBXc/fCvMg==
  dependencies:
    "@vue/cli-shared-utils" "^5.0.8"
-"@vue/cli-plugin-vuex@^5.0.8", "@vue/cli-plugin-vuex@~5.0.8":
+"@vue/cli-plugin-vuex@~5.0.8":
  version "5.0.8"
  resolved "https://registry.npmjs.org/@vue/cli-plugin-vuex/-/cli-plugin-vuex-5.0.8.tgz"
  integrity sha512-HSYWPqrunRE5ZZs8kVwiY6oWcn95qf/OQabwLfprhdpFWAGtLStShjsGED2aDpSSeGAskQETrtR/5h7VqgIlBA==
@@ -5492,9 +5492,9 @@ webpack-virtual-modules@^0.4.2:
  integrity sha512-5tyDlKLqPfMqjT3Q9TAqf2YqjwmnUleZwzJi1A5qXnlBCdj2AtOJ6wAWdglTIDOPgOiOrXeBeFcsQ8+aGQ6QbA==
 webpack@^5.54.0:
-  version "5.78.0"
+  version "5.75.0"
-  resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.78.0.tgz#836452a12416af2a7beae906b31644cb2562f9e6"
+  resolved "https://registry.npmjs.org/webpack/-/webpack-5.75.0.tgz"
-  integrity sha512-gT5DP72KInmE/3azEaQrISjTvLYlSM0j1Ezhht/KLVkrqtv10JoP/RXhwmX/frrutOPuSq3o5Vq0ehR/4Vmd1g==
+  integrity sha512-piaIaoVJlqMsPtX/+3KTTO6jfvrSYgauFVdt8cr9LTHKmcq/AMd4mhzsiP7ZF/PGRNPGA8336jldh9l2Kt2ogQ==
  dependencies:
    "@types/eslint-scope" "^3.7.3"
    "@types/estree" "^0.0.51"
--- a/sist2-vue/package-lock.json
+++ b/sist2-vue/package-lock.json
--- a/sist2-vue/package.json
+++ b/sist2-vue/package.json
@@ -9,11 +9,10 @@
  "dependencies": {
    "@auth0/auth0-spa-js": "^2.0.2",
    "@egjs/vue-infinitegrid": "3.3.0",
    "@tensorflow/tfjs": "^4.4.0",
    "axios": "^0.25.0",
    "bootstrap-vue": "^2.21.2",
    "core-js": "^3.6.5",
-    "d3": "^7.8.4",
+    "d3": "^5.6.1",
    "date-fns": "^2.21.3",
    "dom-to-image": "^2.6.0",
    "fslightbox-vue": "fslightbox-vue.tgz",
--- a/sist2-vue/src/App.vue
+++ b/sist2-vue/src/App.vue
@@ -19,7 +19,6 @@
 import NavBar from "@/components/NavBar";
 import {mapActions, mapGetters, mapMutations} from "vuex";
 import Sist2Api from "@/Sist2Api";
 import ModelsRepo from "@/ml/modelsRepo";
 import {setupAuth0} from "@/main";
 export default {
@@ -37,17 +36,6 @@ export default {
  mounted() {
    this.$store.dispatch("loadConfiguration").then(() => {
      this.$root.$i18n.locale = this.$store.state.optLang;
            ModelsRepo.init(this.$store.getters.mlRepositoryList).catch(err => {
                this.$bvToast.toast(
                    this.$t("ml.repoFetchError"),
                    {
                        title: this.$t("ml.repoFetchErrorTitle"),
                        noAutoHide: true,
                        toaster: "b-toaster-bottom-right",
                        headerClass: "toast-header-warning",
                        bodyClass: "toast-body-warning",
                    });
            });
    });
    this.$store.subscribe((mutation) => {
--- a/sist2-vue/src/Sist2Api.ts
+++ b/sist2-vue/src/Sist2Api.ts
@@ -361,20 +361,20 @@ class Sist2Api {
        });
    }
-    getTreemapStat(indexId: string) {
+    getTreemapCsvUrl(indexId: string) {
-        return `${this.baseUrl}s/${indexId}/TMAP`;
+        return `${this.baseUrl}s/${indexId}/1`;
    }
-    getMimeStat(indexId: string) {
+    getMimeCsvUrl(indexId: string) {
-        return `${this.baseUrl}s/${indexId}/MAGG`;
+        return `${this.baseUrl}s/${indexId}/2`;
    }
-    getSizeStat(indexId: string) {
+    getSizeCsv(indexId: string) {
-        return `${this.baseUrl}s/${indexId}/SAGG`;
+        return `${this.baseUrl}s/${indexId}/3`;
    }
-    getDateStat(indexId: string) {
+    getDateCsv(indexId: string) {
-        return `${this.baseUrl}s/${indexId}/DAGG`;
+        return `${this.baseUrl}s/${indexId}/4`;
    }
 }
--- a/sist2-vue/src/components/AnalyzedContentSpan.vue
+++ b/sist2-vue/src/components/AnalyzedContentSpan.vue
@@ -1,21 +0,0 @@
 <template>
    <span :style="getStyle()">{{span.text}}</span>
 </template>
 <script>
 import ModelsRepo from "@/ml/modelsRepo";
 export default {
    name: "AnalyzedContentSpan",
    props: ["span", "text"],
    methods: {
        getStyle() {
            return ModelsRepo.data[this.$store.getters.mlModel.name].labelStyles[this.span.label];
        }
    }
 }
 </script>
 <style scoped></style>
--- a/sist2-vue/src/components/AnalyzedContentSpanContainer.vue
+++ b/sist2-vue/src/components/AnalyzedContentSpanContainer.vue
@@ -1,75 +0,0 @@
 <template>
    <div>
        <b-card class="mb-2">
            <AnalyzedContentSpan v-for="span of legend" :key="span.id" :span="span"
                                 class="mr-2"></AnalyzedContentSpan>
        </b-card>
        <div class="content-div">
            <AnalyzedContentSpan v-for="span of mergedSpans" :key="span.id" :span="span"></AnalyzedContentSpan>
        </div>
    </div>
 </template>
 <script>
 import AnalyzedContentSpan from "@/components/AnalyzedContentSpan.vue";
 import ModelsRepo from "@/ml/modelsRepo";
 export default {
    name: "AnalyzedContentSpanContainer",
    components: {AnalyzedContentSpan},
    props: ["spans", "text"],
    computed: {
        legend() {
            return Object.entries(ModelsRepo.data[this.$store.state.mlModel.name].legend)
                .map(([label, name]) => ({
                    text: name,
                    id: label,
                    label: label
                }));
        },
        mergedSpans() {
            const spans = this.spans;
            const merged = [];
            let lastLabel = null;
            let fixSpace = false;
            for (let i = 0; i < spans.length; i++) {
                if (spans[i].label !== lastLabel) {
                    let start = spans[i].wordIndex;
                    const nextSpan = spans.slice(i + 1).find(s => s.label !== spans[i].label)
                    let end = nextSpan ? nextSpan.wordIndex : undefined;
                    if (end !== undefined && this.text[end - 1] === " ") {
                        end -= 1;
                        fixSpace = true;
                    }
                    merged.push({
                        text: this.text.slice(start, end),
                        label: spans[i].label,
                        id: spans[i].wordIndex
                    });
                    if (fixSpace) {
                        merged.push({
                            text: " ",
                            label: "O",
                            id: end
                        });
                        fixSpace = false;
                    }
                    lastLabel = spans[i].label;
                }
            }
            return merged;
        },
    },
 }
 </script>
 <style scoped></style>
--- a/sist2-vue/src/components/D3DateHistogram.vue
+++ b/sist2-vue/src/components/D3DateHistogram.vue
@@ -120,7 +120,7 @@ export default {
    update(indexId) {
      const svg = d3.select("#date-histogram");
-      d3.json(Sist2Api.getDateStat(indexId)).then(tabularData => {
+      d3.csv(Sist2Api.getDateCsv(indexId)).then(tabularData => {
        dateHistogram(tabularData.slice(), svg, this.$t("d3.dateHistogram"));
      });
    }
--- a/sist2-vue/src/components/D3MimeBarCount.vue
+++ b/sist2-vue/src/components/D3MimeBarCount.vue
@@ -91,7 +91,7 @@ export default {
      const mimeSvgCount = d3.select("#agg-mime-count");
      const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
-      d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
+      d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
        mimeBarCount(tabularData.slice(), mimeSvgCount, fillOpacity, this.$t("d3.mimeCount"));
      });
    }
--- a/sist2-vue/src/components/D3MimeBarSize.vue
+++ b/sist2-vue/src/components/D3MimeBarSize.vue
@@ -90,7 +90,7 @@ export default {
      const mimeSvgSize = d3.select("#agg-mime-size");
      const fillOpacity = this.$store.state.optTheme === "black" ? 0.9 : 0.6;
-      d3.json(Sist2Api.getMimeStat(indexId)).then(tabularData => {
+      d3.csv(Sist2Api.getMimeCsvUrl(indexId)).then(tabularData => {
        mimeBarSize(tabularData.slice(), mimeSvgSize, fillOpacity, this.$t("d3.mimeSize"));
      });
    }
--- a/sist2-vue/src/components/D3SizeHistogram.vue
+++ b/sist2-vue/src/components/D3SizeHistogram.vue
@@ -117,7 +117,7 @@ export default {
    update(indexId) {
      const svg = d3.select("#size-histogram");
-      d3.json(Sist2Api.getSizeStat(indexId)).then(tabularData => {
+      d3.csv(Sist2Api.getSizeCsv(indexId)).then(tabularData => {
        sizeHistogram(tabularData.slice(), svg, this.$t("d3.sizeHistogram"));
      });
    }
--- a/sist2-vue/src/components/D3Treemap.vue
+++ b/sist2-vue/src/components/D3Treemap.vue
@@ -240,7 +240,7 @@ export default {
          .style("overflow", "visible")
          .style("font", "10px sans-serif");
-      d3.json(Sist2Api.getTreemapStat(indexId)).then(tabularData => {
+      d3.csv(Sist2Api.getTreemapCsvUrl(indexId)).then(tabularData => {
        tabularData.forEach(row => {
          row.taxonomy = row.path.split("/");
          row.size = Number(row.size);
--- a/sist2-vue/src/components/DebugInfo.vue
+++ b/sist2-vue/src/components/DebugInfo.vue
@@ -1,5 +1,5 @@
 <template>
-  <b-card v-if="$store.state.sist2Info.showDebugInfo" class="mb-4 mt-4">
+  <b-card class="mb-4 mt-4">
    <b-card-title><DebugIcon class="mr-1"></DebugIcon>{{ $t("debug") }}</b-card-title>
    <p v-html="$t('debugDescription')"></p>
--- a/sist2-vue/src/components/FeaturedFieldsLine.vue
+++ b/sist2-vue/src/components/FeaturedFieldsLine.vue
@@ -16,10 +16,6 @@ export default {
  props: ["doc"],
  computed: {
    featuredLineHtml() {
            if (this.$store.getters.optFeaturedFields === undefined) {
                return "";
            }
      const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
      return this.$store.getters.optFeaturedFields
--- a/sist2-vue/src/components/LazyContentDiv.vue
+++ b/sist2-vue/src/components/LazyContentDiv.vue
@@ -1,36 +1,6 @@
 <template>
  <Preloader v-if="loading"></Preloader>
-    <div v-else-if="content">
+  <div v-else-if="content" class="content-div" v-html="content"></div>
        <b-form inline class="my-2" v-if="ModelsRepo.getOptions().length > 0">
            <b-checkbox class="ml-auto mr-2" :checked="optAutoAnalyze"
                        @input="setOptAutoAnalyze($event); $store.dispatch('updateConfiguration')">
                {{ $t("ml.auto") }}
            </b-checkbox>
            <b-button :disabled="mlPredictionsLoading || mlLoading" @click="mlAnalyze" variant="primary"
            >{{ $t("ml.analyzeText") }}
            </b-button>
            <b-select :disabled="mlPredictionsLoading || mlLoading" class="ml-2" v-model="mlModel">
                <b-select-option :value="opt.value" v-for="opt of ModelsRepo.getOptions()">{{ opt.text }}
                </b-select-option>
            </b-select>
        </b-form>
        <b-progress v-if="mlLoading" variant="warning" show-progress :max="1" class="mb-3"
        >
            <b-progress-bar :value="modelLoadingProgress">
                <strong>{{ ((modelLoadingProgress * modelSize) / (1024*1024)).toFixed(1) }}MB / {{
                    (modelSize / (1024 * 1024)).toFixed(1)
                    }}MB</strong>
            </b-progress-bar>
        </b-progress>
        <b-progress v-if="mlPredictionsLoading" variant="primary" :value="modelPredictionProgress"
                    :max="content.length" class="mb-3"></b-progress>
        <AnalyzedContentSpansContainer v-if="analyzedContentSpans.length > 0"
                                       :spans="analyzedContentSpans" :text="rawContent"></AnalyzedContentSpansContainer>
        <div v-else class="content-div" v-html="content"></div>
    </div>
 </template>
 <script>
@@ -38,40 +8,22 @@ import Sist2Api from "@/Sist2Api";
 import Preloader from "@/components/Preloader";
 import Sist2Query from "@/Sist2Query";
 import store from "@/store";
 import BertNerModel from "@/ml/BertNerModel";
 import AnalyzedContentSpansContainer from "@/components/AnalyzedContentSpanContainer.vue";
 import ModelsRepo from "@/ml/modelsRepo";
 import {mapGetters, mapMutations} from "vuex";
 export default {
  name: "LazyContentDiv",
-    components: {AnalyzedContentSpansContainer, Preloader},
+  components: {Preloader},
  props: ["docId"],
  data() {
    return {
            ModelsRepo,
      content: "",
-            rawContent: "",
+      loading: true
            loading: true,
            modelLoadingProgress: 0,
            modelPredictionProgress: 0,
            mlPredictionsLoading: false,
            mlLoading: false,
            mlModel: null,
            analyzedContentSpans: []
    }
  },
  mounted() {
        if (this.$store.getters.optMlDefaultModel) {
            this.mlModel = this.$store.getters.optMlDefaultModel
        } else {
            this.mlModel = ModelsRepo.getDefaultModel();
        }
    const query = Sist2Query.searchQuery();
    if (this.$store.state.optHighlight) {
      const fields = this.$store.state.fuzzy
          ? {"content.nGram": {}}
          : {content: {}};
@@ -115,28 +67,14 @@ export default {
      this.loading = false;
      if (resp.hits.hits.length === 1) {
        this.content = this.getContent(resp.hits.hits[0]);
-            }
+      } else {
-
+        console.log("FIXME: could not get content")
-            if (this.optAutoAnalyze) {
+        console.log(resp)
                this.mlAnalyze();
      }
    });
  },
    computed: {
        ...mapGetters(["optAutoAnalyze"]),
        modelSize() {
            const modelData = ModelsRepo.data[this.mlModel];
            if (!modelData) {
                return 0;
            }
            return modelData.size;
        }
    },
  methods: {
        ...mapMutations(["setOptAutoAnalyze"]),
    getContent(doc) {
            this.rawContent = doc._source.content;
      if (!doc.highlight) {
        return doc._source.content;
      }
@@ -147,60 +85,10 @@ export default {
      if (doc.highlight.content) {
        return doc.highlight.content[0];
      }
        },
        async getMlModel() {
            if (this.$store.getters.mlModel.name !== this.mlModel) {
                this.mlLoading = true;
                this.modelLoadingProgress = 0;
                const modelInfo = ModelsRepo.data[this.mlModel];
                const model = new BertNerModel(
                    modelInfo.vocabUrl,
                    modelInfo.modelUrl,
                    modelInfo.id2label,
                )
                await model.init(progress => this.modelLoadingProgress = progress);
                this.$store.commit("setMlModel", {model, name: this.mlModel});
                this.mlLoading = false;
                return model
            }
            return this.$store.getters.mlModel.model;
        },
        async mlAnalyze() {
            if (!this.content) {
                return;
            }
            const modelInfo = ModelsRepo.data[this.mlModel];
            if (modelInfo === undefined) {
                return;
            }
            this.$store.commit("setOptMlDefaultModel", this.mlModel);
            await this.$store.dispatch("updateConfiguration");
            const model = await this.getMlModel();
            this.analyzedContentSpans = [];
            this.mlPredictionsLoading = true;
            await model.predict(this.rawContent, results => {
                results.forEach(result => result.label = modelInfo.humanLabels[result.label]);
                this.analyzedContentSpans.push(...results);
                this.modelPredictionProgress = results[results.length - 1].wordIndex;
            });
            this.mlPredictionsLoading = false;
    }
  }
 }
 </script>
-<style>
+<style scoped>
 .progress-bar {
    transition: none;
 }
 </style>
--- a/sist2-vue/src/i18n/messages.ts
+++ b/sist2-vue/src/i18n/messages.ts
@@ -49,7 +49,6 @@ export default {
        configReset: "Reset configuration",
        searchOptions: "Search options",
        treemapOptions: "Treemap options",
        mlOptions: "Machine learning options",
        displayOptions: "Display options",
        opt: {
            lang: "Language",
@@ -79,10 +78,7 @@ export default {
            simpleLightbox: "Disable animations in image viewer",
            showTagPickerFilter: "Display the tag filter bar",
            featuredFields: "Featured fields Javascript template string. Will appear in the search results.",
-            featuredFieldsList: "Available variables",
+            featuredFieldsList: "Available variables"
            autoAnalyze: "Automatically analyze text",
            defaultModel: "Default model",
            mlRepositories: "Model repositories (one per line)"
        },
        queryMode: {
            simple: "Simple",
@@ -175,12 +171,6 @@ export default {
            selectedIndex: "selected index",
            selectedIndices: "selected indices",
        },
        ml: {
            analyzeText: "Analyze",
            auto: "Auto",
            repoFetchError: "Failed to get list of models. Check browser console for more details.",
            repoFetchErrorTitle: "Could not fetch model repositories",
        }
    },
    de: {
        filePage: {
@@ -260,8 +250,8 @@ export default {
            vidPreviewInterval: "Videovorschau Framedauer in ms",
            simpleLightbox: "Schalte Animationen im Image-Viewer ab",
            showTagPickerFilter: "Zeige die Tag-Filter-Leiste",
-            featuredFields: "Variablen, welche zusätzlich in den Suchergebnissen angezeigt werden können.",
+            featuredFields: "Ausgewählte Felder Javascript Vorlage String. Wird in den Suchergebnissen angezeigt.",
-            featuredFieldsList: "verfügbare Variablen"
+            featuredFieldsList: "Verfügbare Variablen"
        },
        queryMode: {
            simple: "Einfach",
@@ -343,10 +333,10 @@ export default {
            random: "zufällig",
        },
        d3: {
-            mimeCount: "Anzahl nach Medientyp",
+            mimeCount: "Anzahlverteilung nach Medientyp",
-            mimeSize: "Größen nach Medientyp",
+            mimeSize: "Größenverteilung nach Medientyp",
-            dateHistogram: "Änderungszeiten",
+            dateHistogram: "Verteilung der Änderungszeiten",
-            sizeHistogram: "Dateigrößen",
+            sizeHistogram: "Verteilung der Dateigrößen",
        },
        indexPicker: {
            selectNone: "keinen auswählen",
--- a/sist2-vue/src/ml/BertNerModel.js
+++ b/sist2-vue/src/ml/BertNerModel.js
@@ -1,77 +0,0 @@
 import BertTokenizer from "@/ml/BertTokenizer";
 import * as tf from "@tensorflow/tfjs";
 import axios from "axios";
 export default class BertNerModel {
    vocabUrl;
    modelUrl;
    id2label;
    _tokenizer;
    _model;
    inputSize = 128;
    _previousWordId = null;
    constructor(vocabUrl, modelUrl, id2label) {
        this.vocabUrl = vocabUrl;
        this.modelUrl = modelUrl;
        this.id2label = id2label;
    }
    async init(onProgress) {
        await Promise.all([this.loadTokenizer(), this.loadModel(onProgress)]);
    }
    async loadTokenizer() {
        const vocab = (await axios.get(this.vocabUrl)).data;
        this._tokenizer = new BertTokenizer(vocab);
    }
    async loadModel(onProgress) {
        this._model = await tf.loadGraphModel(this.modelUrl, {onProgress});
    }
    alignLabels(labels, wordIds, words) {
        const result = [];
        for (let i = 0; i < this.inputSize; i++) {
            const label = labels[i];
            const wordId = wordIds[i];
            if (wordId === -1) {
                continue;
            }
            if (wordId === this._previousWordId) {
                continue;
            }
            result.push({
                word: words[wordId].text, wordIndex: words[wordId].index, label: label
            });
            this._previousWordId = wordId;
        }
        return result;
    }
    async predict(text, callback) {
        this._previousWordId = null;
        const encoded = this._tokenizer.encodeText(text, this.inputSize)
        for (let chunk of encoded.inputChunks) {
            const rawResult = tf.tidy(() => this._model.execute({
                input_ids: tf.tensor2d(chunk.inputIds, [1, this.inputSize], "int32"),
                token_type_ids: tf.tensor2d(chunk.segmentIds, [1, this.inputSize], "int32"),
                attention_mask: tf.tensor2d(chunk.inputMask, [1, this.inputSize], "int32"),
            }));
            const labelIds = await tf.argMax(rawResult, -1);
            const labelIdsArray = await labelIds.array();
            const labels = labelIdsArray[0].map(id => this.id2label[id]);
            rawResult.dispose()
            callback(this.alignLabels(labels, chunk.wordIds, encoded.words))
        }
    }
 }
--- a/sist2-vue/src/ml/BertTokenizer.js
+++ b/sist2-vue/src/ml/BertTokenizer.js
@@ -1,184 +0,0 @@
 import {zip, chunk} from "underscore";
 const UNK_INDEX = 100;
 const CLS_INDEX = 101;
 const SEP_INDEX = 102;
 const CONTINUING_SUBWORD_PREFIX = "##";
 function isWhitespace(ch) {
    return /\s/.test(ch);
 }
 function isInvalid(ch) {
    return (ch.charCodeAt(0) === 0 || ch.charCodeAt(0) === 0xfffd);
 }
 const punctuations = '[~`!@#$%^&*(){}[];:"\'<,.>?/\\|-_+=';
 /** To judge whether it's a punctuation. */
 function isPunctuation(ch) {
    return punctuations.indexOf(ch) !== -1;
 }
 export default class BertTokenizer {
    vocab;
    constructor(vocab) {
        this.vocab = vocab;
    }
    tokenize(text) {
        const charOriginalIndex = [];
        const cleanedText = this.cleanText(text, charOriginalIndex);
        const origTokens = cleanedText.split(' ');
        let charCount = 0;
        const tokens = origTokens.map((token) => {
            token = token.toLowerCase();
            const tokens = this.runSplitOnPunctuation(token, charCount, charOriginalIndex);
            charCount += token.length + 1;
            return tokens;
        });
        let flattenTokens = [];
        for (let index = 0; index < tokens.length; index++) {
            flattenTokens = flattenTokens.concat(tokens[index]);
        }
        return flattenTokens;
    }
    /* Performs invalid character removal and whitespace cleanup on text. */
    cleanText(text, charOriginalIndex) {
        text = text.replace(/\?/g, "").trim();
        const stringBuilder = [];
        let originalCharIndex = 0;
        let newCharIndex = 0;
        for (const ch of text) {
            // Skip the characters that cannot be used.
            if (isInvalid(ch)) {
                originalCharIndex += ch.length;
                continue;
            }
            if (isWhitespace(ch)) {
                if (stringBuilder.length > 0 && stringBuilder[stringBuilder.length - 1] !== ' ') {
                    stringBuilder.push(' ');
                    charOriginalIndex[newCharIndex] = originalCharIndex;
                    originalCharIndex += ch.length;
                } else {
                    originalCharIndex += ch.length;
                    continue;
                }
            } else {
                stringBuilder.push(ch);
                charOriginalIndex[newCharIndex] = originalCharIndex;
                originalCharIndex += ch.length;
            }
            newCharIndex++;
        }
        return stringBuilder.join('');
    }
    /* Splits punctuation on a piece of text. */
    runSplitOnPunctuation(text, count, charOriginalIndex) {
        const tokens = [];
        let startNewWord = true;
        for (const ch of text) {
            if (isPunctuation(ch)) {
                tokens.push({text: ch, index: charOriginalIndex[count]});
                count += ch.length;
                startNewWord = true;
            } else {
                if (startNewWord) {
                    tokens.push({text: '', index: charOriginalIndex[count]});
                    startNewWord = false;
                }
                tokens[tokens.length - 1].text += ch;
                count += ch.length;
            }
        }
        return tokens;
    }
    encode(words) {
        let outputTokens = [];
        const wordIds = [];
        for (let i = 0; i < words.length; i++) {
            let chars = [...words[i].text];
            let isUnknown = false;
            let start = 0;
            let subTokens = [];
            while (start < chars.length) {
                let end = chars.length;
                let currentSubstring = null;
                while (start < end) {
                    let substr = chars.slice(start, end).join('');
                    if (start > 0) {
                        substr = CONTINUING_SUBWORD_PREFIX + substr;
                    }
                    if (this.vocab.includes(substr)) {
                        currentSubstring = this.vocab.indexOf(substr);
                        break;
                    }
                    --end;
                }
                if (currentSubstring == null) {
                    isUnknown = true;
                    break;
                }
                subTokens.push(currentSubstring);
                start = end;
            }
            if (isUnknown) {
                outputTokens.push(UNK_INDEX);
                wordIds.push(i);
            } else {
                subTokens.forEach(tok => {
                    outputTokens.push(tok);
                    wordIds.push(i)
                });
            }
        }
        return {tokens: outputTokens, wordIds};
    }
    encodeText(inputText, inputSize) {
        const tokenized = this.tokenize(inputText);
        const encoded = this.encode(tokenized);
        const encodedTokenChunks = chunk(encoded.tokens, inputSize - 2);
        const encodedWordIdChunks = chunk(encoded.wordIds, inputSize - 2);
        const chunks = [];
        zip(encodedTokenChunks, encodedWordIdChunks).forEach(([tokens, wordIds]) => {
            const inputIds = [CLS_INDEX, ...tokens, SEP_INDEX];
            const segmentIds = Array(inputIds.length).fill(0);
            const inputMask = Array(inputIds.length).fill(1);
            wordIds = [-1, ...wordIds, -1];
            while (inputIds.length < inputSize) {
                inputIds.push(0);
                inputMask.push(0);
                segmentIds.push(0);
                wordIds.push(-1);
            }
            chunks.push({inputIds, inputMask, segmentIds, wordIds})
        });
        return {
            inputChunks: chunks,
            words: tokenized
        };
    }
 }
--- a/sist2-vue/src/ml/modelsRepo.js
+++ b/sist2-vue/src/ml/modelsRepo.js
@@ -1,43 +0,0 @@
 import axios from "axios";
 class ModelsRepo {
    _repositories;
    data = {};
    async init(repositories) {
        this._repositories = repositories;
        const data = await Promise.all(this._repositories.map(this._loadRepository));
        data.forEach(models => {
            models.forEach(model => {
                this.data[model.name] = model;
            })
        });
    }
    async _loadRepository(repository) {
        const data = (await axios.get(repository)).data;
        data.forEach(model => {
            model["modelUrl"] = new URL(model["modelPath"], repository).href;
            model["vocabUrl"] = new URL(model["vocabPath"], repository).href;
        });
        return data;
    }
    getOptions() {
        return Object.values(this.data).map(model => ({
            text: `${model.name} (${Math.round(model.size / (1024*1024))}MB)`,
            value: model.name
        }));
    }
    getDefaultModel() {
        if (Object.values(this.data).length === 0) {
            return null;
        }
        return Object.values(this.data).find(model => model.default).name;
    }
 }
 export default new ModelsRepo();
--- a/sist2-vue/src/store/index.ts
+++ b/sist2-vue/src/store/index.ts
@@ -57,9 +57,6 @@ export default new Vuex.Store({
        optVidPreviewInterval: 700,
        optSimpleLightbox: true,
        optShowTagPickerFilter: true,
        optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
        optAutoAnalyze: false,
        optMlDefaultModel: null,
        _onLoadSelectedIndices: [] as string[],
        _onLoadSelectedMimeTypes: [] as string[],
@@ -89,11 +86,7 @@ export default new Vuex.Store({
        uiMimeMap: [] as any[],
-        auth0Token: null,
+        auth0Token: null
        mlModel: {
            model: null,
            name: null
        },
    },
    mutations: {
        setUiShowDetails: (state, val) => state.uiShowDetails = val,
@@ -179,9 +172,6 @@ export default new Vuex.Store({
        setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
        setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
        setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
        setOptAutoAnalyze: (state, val) => {state.optAutoAnalyze = val},
        setOptMlRepositories: (state, val) => {state.optMlRepositories = val},
        setOptMlDefaultModel: (state, val) => {state.optMlDefaultModel = val},
        setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
        setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@@ -204,7 +194,6 @@ export default new Vuex.Store({
            // noop
        },
        setAuth0Token: (state, val) => state.auth0Token = val,
        setMlModel: (state, val) => state.mlModel = val,
    },
    actions: {
        setSist2Info: (store, val) => {
@@ -361,7 +350,6 @@ export default new Vuex.Store({
    },
    modules: {},
    getters: {
        mlModel: (state) => state.mlModel,
        seed: (state) => state.seed,
        getPathText: (state) => state.pathText,
        indices: state => state.indices,
@@ -428,12 +416,5 @@ export default new Vuex.Store({
        optSimpleLightbox: state => state.optSimpleLightbox,
        optShowTagPickerFilter: state => state.optShowTagPickerFilter,
        optFeaturedFields: state => state.optFeaturedFields,
        optMlRepositories: state => state.optMlRepositories,
        mlRepositoryList: state => {
            const repos = state.optMlRepositories.split("\n")
            return repos[0] == "" ? [] : repos;
        },
        optMlDefaultModel: state => state.optMlDefaultModel,
        optAutoAnalyze: state => state.optAutoAnalyze,
    }
 })
--- a/sist2-vue/src/views/Configuration.vue
+++ b/sist2-vue/src/views/Configuration.vue
@@ -25,8 +25,7 @@
          <b-form-select :options="themeOptions" :value="optTheme" @input="setOptTheme"></b-form-select>
          <label>{{ $t("opt.displayMode") }}</label>
-                    <b-form-select :options="displayModeOptions" :value="optDisplay"
+          <b-form-select :options="displayModeOptions" :value="optDisplay" @input="setOptDisplay"></b-form-select>
                                   @input="setOptDisplay"></b-form-select>
          <label>{{ $t("opt.columns") }}</label>
          <b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
@@ -124,10 +123,7 @@
            }}
          </b-form-checkbox>
-                    <b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{
+          <b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
                        $t("opt.highlight")
                        }}
                    </b-form-checkbox>
          <b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
              $t("opt.tagOrOperator")
            }}
@@ -152,8 +148,7 @@
                        @input="setOptResultSize"></b-form-input>
          <label>{{ $t("opt.queryMode") }}</label>
-                    <b-form-select :options="queryModeOptions" :value="optQueryMode"
+          <b-form-select :options="queryModeOptions" :value="optQueryMode" @input="setOptQueryMode"></b-form-select>
                                   @input="setOptQueryMode"></b-form-select>
          <label>{{ $t("opt.slideDuration") }}</label>
          <b-form-input :value="optLightboxSlideDuration" type="number" min="1"
@@ -164,17 +159,6 @@
                        @input="setOptVidPreviewInterval"></b-form-input>
        </b-card>
                <h4 class="mt-3">{{ $t("mlOptions") }}</h4>
                <b-card>
                    <label>{{ $t("opt.mlRepositories") }}</label>
                    <b-textarea rows="3" :value="optMlRepositories" @input="setOptMlRepositories"></b-textarea>
                    <br>
                    <b-form-checkbox :checked="optAutoAnalyze" @input="setOptAutoAnalyze">{{
                            $t("opt.autoAnalyze")
                        }}
                    </b-form-checkbox>
                </b-card>
        <h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
        <b-card>
          <label>{{ $t("opt.treemapType") }}</label>
@@ -327,8 +311,6 @@ export default {
      "optSimpleLightbox",
      "optShowTagPickerFilter",
      "optFeaturedFields",
            "optMlRepositories",
            "optAutoAnalyze",
    ]),
    clientWidth() {
      return window.innerWidth;
@@ -373,8 +355,6 @@ export default {
      "setOptSimpleLightbox",
      "setOptShowTagPickerFilter",
      "setOptFeaturedFields",
            "setOptMlRepositories",
            "setOptAutoAnalyze",
    ]),
    onResetClick() {
      localStorage.removeItem("sist2_configuration");
--- a/sist2-vue/src/views/SearchPage.vue
+++ b/sist2-vue/src/views/SearchPage.vue
@@ -7,11 +7,7 @@
      <Preloader></Preloader>
    </b-card>
-        <b-alert v-show="!uiLoading && showEsConnectionError" show variant="danger" class="mt-2">
+    <b-card v-show="!uiLoading" id="search-panel">
            {{ $t("toast.esConnErr") }}
        </b-alert>
        <b-card v-show="!uiLoading && !showEsConnectionError" id="search-panel">
      <SearchBar @show-help="showHelp=true"></SearchBar>
      <b-row>
        <b-col style="height: 70px;" sm="6">
@@ -98,8 +94,7 @@ export default Vue.extend({
    docChecksums: new Set(),
    searchBusy: false,
    Sist2Query: Sist2Query,
-        showHelp: false,
+    showHelp: false
        showEsConnectionError: false
  }),
  computed: {
    ...mapGetters(["indices", "optDisplay"]),
@@ -148,15 +143,6 @@ export default Vue.extend({
        this.uiLoading = false;
        this.search(true);
      });
        }).catch(error => {
            console.log(error);
            if (error.response.status == 503 || error.response.status == 500) {
                this.showEsConnectionError = true;
                this.uiLoading = false;
            } else {
                this.showErrorToast();
            }
    });
  },
  methods: {
@@ -267,20 +253,11 @@ export default Vue.extend({
        },
        size: 0
      }).then(res => {
-                const range = {
+        return {
          min: res.aggregations.dateMin.value,
          max: res.aggregations.dateMax.value,
        }
-
+      })
                if (range.min == null) {
                    range.min = 0;
                    range.max = 1;
                } else if (range.min == range.max) {
                    range.max += 1;
                }
                return range;
            });
    },
    appendFunc() {
      if (!this.$store.state.uiReachedScrollEnd && this.search && !this.searchBusy) {
--- a/src/database/database.c
+++ b/src/database/database.c
@@ -83,7 +83,6 @@ void database_open(database_t *db) {
    LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
    CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
    sqlite3_busy_timeout(db->db, 1000);
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
@@ -329,18 +328,18 @@ database_iterator_t *database_create_document_iterator(database_t *db) {
                               " WHEN sc.json_data IS NULL THEN"
                               "  CASE"
                               "   WHEN t.tag IS NULL THEN"
-                               "    json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime)"
+                               "    document.json_data"
                               "   ELSE"
-                               "    json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
+                               "    json_set(document.json_data, '$.tag', json_group_array(t.tag))"
                               "   END"
                               " ELSE"
                               "  CASE"
                               "   WHEN t.tag IS NULL THEN"
-                               "    json_patch(json_set(document.json_data, '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime), sc.json_data)"
+                               "    json_patch(document.json_data, sc.json_data)"
                               "   ELSE"
                               //   This will overwrite any tags specified in the sidecar file!
                               //   TODO: concatenate the two arrays?
-                               "    json_set(json_patch(document.json_data, sc.json_data), '$._id', document.id, '$.size', document.size, '$.mtime', document.mtime, '$.tag', json_group_array(t.tag))"
+                               "    json_set(json_patch(document.json_data, sc.json_data), '$.tag', json_group_array(t.tag))"
                               "   END"
                               " END"
                               " FROM document"
@@ -582,33 +581,18 @@ void database_add_work(database_t *db, job_t *job) {
            ret = sqlite3_step(db->insert_parse_job_stmt);
            if (ret == SQLITE_FULL) {
                sqlite3_reset(db->insert_parse_job_stmt);
                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
                usleep(1000000);
                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
                continue;
            } else {
                CRASH_IF_STMT_FAIL(ret);
            }
-            ret = sqlite3_reset(db->insert_parse_job_stmt);
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->insert_parse_job_stmt));
-            if (ret == SQLITE_FULL) {
+        } while (ret != SQLITE_DONE);
                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
                usleep(100000);
                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
            } else if (ret != SQLITE_OK) {
                LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
            }
        } while (ret != SQLITE_DONE && ret != SQLITE_OK);
    } else if (job->type == JOB_BULK_LINE) {
        do {
            sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
            sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
            if (job->bulk_line->type != ES_BULK_LINE_DELETE) {
            sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
            } else {
                sqlite3_bind_null(db->insert_index_job_stmt, 3);
            }
            ret = sqlite3_step(db->insert_index_job_stmt);
@@ -627,8 +611,6 @@ void database_add_work(database_t *db, job_t *job) {
                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
                usleep(100000);
                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
            } else if (ret != SQLITE_OK) {
                LOG_FATALF("database.c", "sqlite3_reset returned error %d", ret);
            }
        } while (ret != SQLITE_DONE && ret != SQLITE_OK);
--- a/src/database/database.h
+++ b/src/database/database.h
@@ -18,14 +18,6 @@ typedef enum {
    FTS_DATABASE
 } database_type_t;
 typedef enum {
    DATABASE_STAT_INVALID,
    DATABASE_STAT_TREEMAP,
    DATABASE_STAT_MIME_AGG,
    DATABASE_STAT_SIZE_AGG,
    DATABASE_STAT_DATE_AGG,
 } database_stat_type_d;
 typedef enum {
    JOB_UNDEFINED,
    JOB_BULK_LINE,
@@ -112,14 +104,14 @@ database_iterator_t *database_create_document_iterator(database_t *db);
 cJSON *database_document_iter(database_iterator_t *);
 #define database_document_iter_foreach(element, iter) \
-    for (cJSON *(element) = database_document_iter(iter); (element) != NULL; (element) = database_document_iter(iter))
+    for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
 database_iterator_t *database_create_delete_list_iterator(database_t *db);
 char * database_delete_list_iter(database_iterator_t *iter);
 #define database_delete_list_iter_foreach(element, iter) \
-    for (char *(element) = database_delete_list_iter(iter); (element) != NULL; (element) = database_delete_list_iter(iter))
+    for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
 cJSON *database_incremental_scan_begin(database_t *db);
@@ -140,16 +132,12 @@ treemap_row_t database_treemap_iter(database_iterator_t *iter);
 void database_generate_stats(database_t *db, double treemap_threshold);
 database_stat_type_d database_get_stat_type_by_mnemonic(const char *name);
 job_t *database_get_work(database_t *db, job_type_t job_type);
 void database_add_work(database_t *db, job_t *job);
 //void database_index(database_t *db);
 cJSON *database_get_stats(database_t *db, database_stat_type_d type);
 #define CRASH_IF_STMT_FAIL(x) do { \
        int return_value = x;                \
        if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) {     \
--- a/src/database/database_stats.c
+++ b/src/database/database_stats.c
@@ -6,7 +6,6 @@
 #define SIZE_BUCKET (long)(5 * 1000 * 1000)
 #define DATE_BUCKET (long)(2629800) // ~30 days
 database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
    sqlite3_stmt *stmt;
@@ -158,85 +157,3 @@ void database_generate_stats(database_t *db, double treemap_threshold) {
    LOG_INFO("database.c", "Done!");
 }
 database_stat_type_d database_get_stat_type_by_mnemonic(const char *name) {
    if (strcmp(name, "TMAP") == 0) {
        return DATABASE_STAT_TREEMAP;
    }
    if (strcmp(name, "MAGG") == 0) {
        return DATABASE_STAT_MIME_AGG;
    }
    if (strcmp(name, "SAGG") == 0) {
        return DATABASE_STAT_SIZE_AGG;
    }
    if (strcmp(name, "DAGG") == 0) {
        return DATABASE_STAT_DATE_AGG;
    }
    return DATABASE_STAT_INVALID;
 }
 cJSON *database_get_stats(database_t *db, database_stat_type_d type) {
    sqlite3_stmt *stmt;
    switch (type) {
        case DATABASE_STAT_TREEMAP:
            CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                    db->db, "SELECT path,size FROM stats_treemap", -1, &stmt, NULL
            ));
            break;
        case DATABASE_STAT_DATE_AGG:
            CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                    db->db, "SELECT bucket,count FROM stats_date_agg", -1, &stmt, NULL
            ));
            break;
        case DATABASE_STAT_SIZE_AGG:
            CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                    db->db, "SELECT bucket,count FROM stats_size_agg", -1, &stmt, NULL
            ));
            break;
        case DATABASE_STAT_MIME_AGG:
            CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                    db->db, "SELECT mime,size,count FROM stats_mime_agg", -1, &stmt, NULL
            ));
            break;
        case DATABASE_STAT_INVALID:
        default:
        LOG_FATALF("database_stats.c", "Invalid stat type: %d", type);
    }
    cJSON *json = cJSON_CreateArray();
    int ret;
    do {
        ret = sqlite3_step(stmt);
        CRASH_IF_STMT_FAIL(ret);
        if (ret == SQLITE_DONE) {
            break;
        }
        cJSON *row = cJSON_CreateObject();
        switch (type) {
            case DATABASE_STAT_TREEMAP:
                cJSON_AddStringToObject(row, "path", (const char *) sqlite3_column_text(stmt, 0));
                cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
                break;
            case DATABASE_STAT_DATE_AGG:
            case DATABASE_STAT_SIZE_AGG:
                cJSON_AddNumberToObject(row, "bucket", (double) sqlite3_column_int64(stmt, 0));
                cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 1));
                break;
            case DATABASE_STAT_MIME_AGG:
                cJSON_AddStringToObject(row, "mime", (const char *) sqlite3_column_text(stmt, 0));
                cJSON_AddNumberToObject(row, "size", (double) sqlite3_column_int64(stmt, 1));
                cJSON_AddNumberToObject(row, "count", (double) sqlite3_column_int64(stmt, 2));
                break;
        }
        cJSON_AddItemToArray(json, row);
    } while (TRUE);
    return json;
 }
--- a/src/database/database_stats.h
+++ b/src/database/database_stats.h
@@ -0,0 +1,5 @@
 #ifndef SIST2_DATABASE_STATS_H
 #define SIST2_DATABASE_STATS_H
 #endif //SIST2_DATABASE_STATS_H
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -64,16 +64,20 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
    cJSON_Delete(line);
 }
-void delete_document(const char *document_id) {
+void index_json_func(job_t *job) {
-    es_bulk_line_t bulk_line;
+    elastic_index_line(job->bulk_line);
 }
-    bulk_line.type = ES_BULK_LINE_DELETE;
+void delete_document(const char *document_id) {
-    bulk_line.next = NULL;
+    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
-    strcpy(bulk_line.doc_id, document_id);
+
    bulk_line->type = ES_BULK_LINE_DELETE;
    bulk_line->next = NULL;
    strcpy(bulk_line->doc_id, document_id);
    tpool_add_work(IndexCtx.pool, &(job_t) {
            .type = JOB_BULK_LINE,
-            .bulk_line = &bulk_line,
+            .bulk_line = bulk_line,
    });
 }
@@ -95,7 +99,6 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
        .type = JOB_BULK_LINE,
        .bulk_line = bulk_line,
    });
    free(bulk_line);
 }
 void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -91,6 +91,8 @@ char *build_json_string(document_t *doc) {
    } else {
        cJSON_AddStringToObject(json, "mime", mime_text);
    }
    cJSON_AddNumberToObject(json, "size", (double) doc->size);
    cJSON_AddNumberToObject(json, "mtime", doc->mtime);
    // Ignore root directory in the file path
    doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
@@ -120,6 +122,8 @@ char *build_json_string(document_t *doc) {
        cJSON_AddStringToObject(json, "path", "");
    }
    cJSON_AddStringToObject(json, "_id", doc->doc_id);
    // Metadata
    meta_line_t *meta = doc->meta_head;
    while (meta != NULL) {
--- a/src/main.c
+++ b/src/main.c
@@ -195,10 +195,6 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.mobi_ctx.content_size = args->content_size;
    ScanCtx.mobi_ctx.log = log_callback;
    ScanCtx.mobi_ctx.logf = logf_callback;
    ScanCtx.mobi_ctx.store = write_thumbnail_callback;
    ScanCtx.mobi_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.mobi_ctx.tn_size = args->tn_size;
    ScanCtx.mobi_ctx.tn_qscale = args->tn_quality;
    // TEXT
    ScanCtx.text_ctx.content_size = args->content_size;
@@ -316,20 +312,17 @@ void sist2_index(index_args_t *args) {
    database_open(db);
    database_iterator_t *iterator = database_create_document_iterator(db);
    database_document_iter_foreach(json, iterator) {
-        char doc_id[SIST_DOC_ID_LEN];
+        const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
        strcpy(doc_id, cJSON_GetObjectItem(json, "_id")->valuestring);
        cJSON_DeleteItemFromObject(json, "_id");
        if (args->print) {
            print_json(json, doc_id);
        } else {
            index_json(json, doc_id);
            cnt += 1;
        }
        cJSON_Delete(json);
    }
    free(iterator);
    database_close(db, FALSE);
    if (!args->print) {
        database_iterator_t *del_iter = database_create_delete_list_iterator(db);
@@ -337,11 +330,8 @@ void sist2_index(index_args_t *args) {
            delete_document(id);
            free(id);
        }
        free(del_iter);
    }
    database_close(db, FALSE);
    tpool_wait(IndexCtx.pool);
    tpool_destroy(IndexCtx.pool);
--- a/src/sist.h
+++ b/src/sist.h
@@ -51,11 +51,11 @@
 #include <ctype.h>
 #include "git_hash.h"
-#define VERSION "3.0.4"
+#define VERSION "3.0.0"
 static const char *const Version = VERSION;
 static const int VersionMajor = 3;
 static const int VersionMinor = 0;
-static const int VersionPatch = 4;
+static const int VersionPatch = 0;
 #ifndef SIST_PLATFORM
 #define SIST_PLATFORM unknown
--- a/src/tpool.c
+++ b/src/tpool.c
@@ -149,11 +149,6 @@ void worker_proc_cleanup(tpool_t *pool) {
    if (ProcData.index_db != NULL) {
        database_close(ProcData.index_db, FALSE);
    }
    if (IndexCtx.needs_es_connection) {
        elastic_cleanup();
    }
    database_close(ProcData.ipc_db, FALSE);
 }
@@ -247,7 +242,6 @@ static void *tpool_worker(void *arg) {
    pthread_mutex_lock(&pool->shm->mutex);
    pthread_cond_signal(&pool->shm->done_working_cond);
    pthread_mutex_unlock(&pool->shm->mutex);
    worker_proc_cleanup(pool);
 #endif
    return NULL;
--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -20,40 +20,49 @@ static struct mg_http_serve_opts DefaultServeOpts = {
 void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != SIST_INDEX_ID_LEN + 7) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
        HTTP_REPLY_NOT_FOUND
        return;
    }
    char arg_index_id[SIST_INDEX_ID_LEN];
    char arg_stat_type[5];
    memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
    memcpy(arg_stat_type, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, 4);
    *(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
-    database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
+    index_t *index = web_get_index_by_id(arg_index_id);
-    if (stat_type == DATABASE_STAT_INVALID) {
+    if (index == NULL) {
        HTTP_REPLY_NOT_FOUND
        return;
    }
-    database_t *db = web_get_database(arg_index_id);
+    const char *file;
-    if (db == NULL) {
+    switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
-        LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index_id);
+        case 1:
-        HTTP_REPLY_NOT_FOUND
+            file = "treemap.csv";
            break;
        case 2:
            file = "mime_agg.csv";
            break;
        case 3:
            file = "size_agg.csv";
            break;
        case 4:
            file = "date_agg.csv";
            break;
        default:
            return;
    }
-    cJSON *json = database_get_stats(db, stat_type);
+    char disposition[8192];
-    char *json_str = cJSON_PrintUnformatted(json);
+    snprintf(disposition, sizeof(disposition),
             "Content-Disposition: inline; filename=\"%s\"\r\nCache-Control: max-age=31536000\r\n", file);
-    web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
+    char full_path[PATH_MAX];
-    mg_send(nc, json_str, strlen(json_str));
+    strcpy(full_path, index->path);
    strcat(full_path, file);
-    free(json_str);
+    struct mg_http_serve_opts opts = {};
-    cJSON_Delete(json);
+    mg_http_serve_file(nc, hm, full_path, &opts);
 }
 void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
@@ -277,23 +286,16 @@ void index_info(struct mg_connection *nc) {
    cJSON *json = cJSON_CreateObject();
    cJSON *arr = cJSON_AddArrayToObject(json, "indices");
    cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
    cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
    cJSON_AddStringToObject(json, "version", Version);
 #ifdef SIST_DEBUG_INFO
    cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
    cJSON_AddStringToObject(json, "esVersion", es_version);
    cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
    cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
    cJSON_AddBoolToObject(json, "dev", WebCtx.dev);
    cJSON_AddBoolToObject(json, "showDebugInfo", TRUE);
 #else
    cJSON_AddBoolToObject(json, "showDebugInfo", FALSE);
 #endif
    cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
    cJSON_AddBoolToObject(json, "esVersionLegacy", IS_LEGACY_VERSION(WebCtx.es_version));
    cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
    cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
    cJSON_AddStringToObject(json, "lang", WebCtx.lang);
    cJSON_AddBoolToObject(json, "dev", WebCtx.dev);
    cJSON_AddBoolToObject(json, "auth0Enabled", WebCtx.auth0_enabled);
    if (WebCtx.auth0_enabled) {
@@ -666,9 +668,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                    mg_send(nc, r->body, r->size);
                } else if (r->status_code == 0) {
                    sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");
                    mg_http_reply(nc, 503, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
                                  "Elasticsearch connection error, see server logs.");
                } else {
                    sist_logf("serve.c", LOG_SIST_WARNING, "ElasticSearch error during query (%d)", r->status_code);
                    if (r->size != 0) {
--- a/third-party/libscan/CMakeLists.txt
+++ b/third-party/libscan/CMakeLists.txt
@@ -106,7 +106,7 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
 find_library(CMS_LIB NAMES lcms2)
 find_library(JAS_LIB NAMES jasper)
 find_library(GUMBO_LIB NAMES gumbo)
-find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/8/ /usr/lib/gcc/aarch64-linux-gnu/8/)
+find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/)
 find_package(Leptonica CONFIG REQUIRED)
 find_package(FFMPEG REQUIRED)
 find_package(libraw CONFIG REQUIRED)
--- a/third-party/libscan/libscan/mobi/scan_mobi.c
+++ b/third-party/libscan/libscan/mobi/scan_mobi.c
@@ -1,44 +1,9 @@
 #include "scan_mobi.h"
 #include "../../third-party/libmobi/src/mobi.h"
 #include "../media/media.h"
 #include <errno.h>
 #include "stdlib.h"
 int store_cover(scan_mobi_ctx_t *ctx, document_t *doc, MOBIData *m) {
    MOBIExthHeader *exth = mobi_get_exthrecord_by_tag(m, EXTH_COVEROFFSET);
    if (exth == NULL) {
        return FALSE;
    }
    uint32_t offset = mobi_decode_exthvalue(exth->data, exth->size);
    size_t first_resource = mobi_get_first_resource_record(m);
    size_t uid = first_resource + offset;
    MOBIPdbRecord *record = mobi_get_record_by_seqnumber(m, uid);
    if (record == NULL || record->size < 4) {
        return FALSE;
    }
    scan_media_ctx_t media_ctx = {
            .tn_count = TRUE,
            .tn_size = ctx->tn_size,
            .tn_qscale = ctx->tn_qscale,
            .tesseract_lang = NULL,
            .tesseract_path = NULL,
            .read_subtitles = FALSE,
            .max_media_buffer = 0,
            .log = ctx->log,
            .logf = ctx->logf,
            .store = ctx->store,
    };
    store_image_thumbnail(&media_ctx, record->data, record->size, doc, "img.jpg");
    return TRUE;
 }
 void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    MOBIData *m = mobi_init();
@@ -107,10 +72,6 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
    if (ctx->enable_tn) {
        store_cover(ctx, doc, m);
    }
    free(content_str);
    free(buf);
    text_buffer_destroy(&tex);
--- a/third-party/libscan/libscan/mobi/scan_mobi.h
+++ b/third-party/libscan/libscan/mobi/scan_mobi.h
@@ -7,11 +7,6 @@ typedef struct {
    long content_size;
    log_callback_t log;
    logf_callback_t logf;
    store_callback_t store;
    int tn_qscale;
    int tn_size;
    int enable_tn;
 } scan_mobi_ctx_t;
 void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
--- a/third-party/libscan/third-party/antiword
+++ b/third-party/libscan/third-party/antiword
--- a/third-party/libscan/third-party/libmobi
+++ b/third-party/libscan/third-party/libmobi