Update CI build script

Update sist2-admin for 3.x.x, more fixes
Update dependencies, fix some build issues
2025-12-15 08:19:06 +00:00 · 2023-04-10 20:01:49 -04:00 · 2023-04-10 19:45:08 -04:00 · 2023-04-10 15:10:56 -04:00 · 2023-04-10 11:04:16 -04:00 · 2023-04-03 21:39:50 -04:00
128 changed files with 6181 additions and 6738 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -15,7 +15,6 @@ Makefile
 **/*.cbp
 VERSION
 **/node_modules/
 .git/
 sist2-*-linux-debug
 sist2-*-linux
 sist2_debug
@@ -34,3 +33,8 @@ Dockerfile
 Dockerfile.arm64
 docker-compose.yml
 state.db
 *-journal
 build/
 __pycache__/
 sist2-vue/dist
 sist2-admin/frontend/dist
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,14 @@ state.db
 *.pyc
 !sist2-admin/frontend/dist
 *.js.map
 sist2-vue/dist
 sist2-admin/frontend/dist
 .ninja_deps
 .ninja_log
 build.ninja
 src/web/static_generated.c
 src/magic_generated.c
 src/index/static_generated.c
 *.sist2
 *-shm
 *-journal
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "third-party/libscan/third-party/libmobi"]
 	path = third-party/libscan/third-party/libmobi
 	url = https://github.com/bfabiszewski/libmobi
 [submodule "third-party/libscan/libscan-test-files"]
 	path = third-party/libscan/libscan-test-files
 	url = https://github.com/simon987/libscan-test-files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,6 @@ set(CMAKE_C_STANDARD 11)
 option(SIST_DEBUG "Build a debug executable" on)
 option(SIST_FAST "Enable more optimisation flags" off)
 option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
 add_compile_definitions(
        "SIST_PLATFORM=${SIST_PLATFORM}"
@@ -22,29 +21,33 @@ set(ARGPARSE_SHARED off)
 add_subdirectory(third-party/argparse)
 add_executable(sist2
        # argparse
        third-party/argparse/argparse.h third-party/argparse/argparse.c
        src/main.c
        src/sist.h
        src/io/walk.h src/io/walk.c
        src/io/store.h src/io/store.c
        src/tpool.h src/tpool.c
        src/parsing/parse.h src/parsing/parse.c
        src/parsing/magic_util.c src/parsing/magic_util.h
        src/io/serialize.h src/io/serialize.c
        src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
        src/index/web.c src/index/web.h
        src/web/serve.c src/web/serve.h
        src/web/web_util.c src/web/web_util.h
        src/index/elastic.c src/index/elastic.h
        src/util.c src/util.h
-        src/ctx.h src/types.h
+        src/ctx.c src/ctx.h
        src/types.h
        src/log.c src/log.h
        src/cli.c src/cli.h
        src/stats.c src/stats.h src/ctx.c
        src/parsing/sidecar.c src/parsing/sidecar.h
        src/database/database.c src/database/database.h
        src/parsing/fs_util.h
        src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp
-        # argparse
+        src/database/database_stats.c src/database/database_stats.h src/database/database_schema.c)
        third-party/argparse/argparse.h third-party/argparse/argparse.c
        )
 set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@@ -52,16 +55,11 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
 find_package(PkgConfig REQUIRED)
 pkg_search_module(GLIB REQUIRED glib-2.0)
 find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
 find_package(CURL CONFIG REQUIRED)
-find_library(MAGIC_LIB
+find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
-        NAMES libmagic.so.1 magic
+find_package(unofficial-sqlite3 CONFIG REQUIRED)
        PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
 )
 target_include_directories(
@@ -70,7 +68,6 @@ target_include_directories(
        ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
        ${CMAKE_SOURCE_DIR}/third-party/libscan/
        ${CMAKE_SOURCE_DIR}/
        ${GLIB_INCLUDE_DIRS}
 )
 target_compile_options(
@@ -120,6 +117,7 @@ else ()
            -Ofast
            -fno-stack-protector
            -fomit-frame-pointer
            -w
    )
 endif ()
@@ -133,20 +131,16 @@ target_link_libraries(
        sist2
        z
        lmdb
        cjson
        argparse
        ${GLIB_LDFLAGS}
        unofficial::mongoose::mongoose
        CURL::libcurl
        pthread
        c
        scan
        ${MAGIC_LIB}
        unofficial::sqlite3::sqlite3
 )
 add_custom_target(
--- a/23
+++ b/23
@@ -1,6 +1,11 @@
 FROM simon987/sist2-build as build
 MAINTAINER simon987 <me@simon987.net>
 ENV DEBIAN_FRONTEND=noninteractive
 RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash
 RUN apt update -y; apt install -y nodejs && rm -rf /var/lib/apt/lists/*
 WORKDIR /build/
 COPY scripts scripts
@@ -9,10 +14,14 @@ COPY CMakeLists.txt .
 COPY third-party third-party
 COPY src src
 COPY sist2-vue sist2-vue
 COPY sist2-admin sist2-admin
-RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
+RUN cd sist2-vue/ && npm install && npm run build
-RUN make -j$(nproc)
+RUN cd sist2-admin/frontend/ && npm install && npm run build
-RUN strip sist2 || mv sist2_debug sist2
+
 RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
 RUN cd build && make -j$(nproc)
 RUN strip build/sist2 || mv build/sist2_debug build/sist2
 FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea
@@ -24,7 +33,7 @@ ENV LC_ALL C.UTF-8
 ENTRYPOINT ["/root/sist2"]
 RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3  \
-    python3-pip git tesseract-ocr libpq-dev && rm -rf /var/lib/apt/lists/*
+    python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*
 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@@ -35,12 +44,14 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
    curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
    curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
    curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
    curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
 # sist2
-COPY --from=build /build/sist2 /root/sist2
+COPY --from=build /build/build/sist2 /root/sist2
 # sist2-admin
 COPY sist2-admin/requirements.txt sist2-admin/
 RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
-COPY sist2-admin/ sist2-admin/
+COPY --from=build /build/sist2-admin/ sist2-admin/
--- a/Dockerfile.arm64
+++ b/Dockerfile.arm64
@@ -3,13 +3,20 @@ MAINTAINER simon987 <me@simon987.net>
 WORKDIR /build/
 ADD . /build/
-RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
+RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
-RUN make -j$(nproc)
+RUN cd build && make -j$(nproc)
-RUN strip sist2
+RUN strip build/sist2 || mv build/sist2_debug build/sist2
-FROM --platform="linux/arm64/v8" ubuntu:20.04
+FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3
-RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
+WORKDIR /root
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 ENTRYPOINT ["/root/sist2"]
 RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/*
 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@@ -18,11 +25,16 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
-    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
+    curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
    curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
    curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
    curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata
-ENV LANG C.UTF-8
+# sist2
-ENV LC_ALL C.UTF-8
+COPY --from=build /build/build/sist2 /root/sist2
-ENTRYPOINT ["/root/sist2"]
+# sist2-admin
-
+COPY sist2-admin/requirements.txt sist2-admin/
-COPY --from=build /build/sist2 /root/sist2
+RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
 COPY --from=build /build/sist2-admin/ sist2-admin/
--- a/README.md
+++ b/README.md
@@ -37,12 +37,12 @@ sist2 (Simple incremental search tool)
    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
    1. *(or)* Run using docker:
        ```bash
-        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
+        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
        ```
    1. *(or)* Run using docker-compose:
        ```yaml
          elasticsearch:
-            image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
+            image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9
            environment:
              - discovery.type=single-node
              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
@@ -81,7 +81,7 @@ See [Usage guide](docs/USAGE.md) for more details
 | html, xml                                                                 | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | no          | -                                                                                                                                      |
 | tar, zip, rar, 7z, ar ...                                                 | Libarchive                                                                   | yes\*    | -           | no                                                                                                                                     |
 | docx, xlsx, pptx                                                          | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | if embedded | creator, modified_by, title                                                                                                            |
-| doc (MS Word 97-2003)                                                     | antiword                                                                     | yes      | yes         | author, title                                                                                                                          |
+| doc (MS Word 97-2003)                                                     | antiword                                                                     | yes      | no          | author, title                                                                                                                          |
 | mobi, azw, azw3                                                           | libmobi                                                                      | yes      | no          | author, title                                                                                                                          |
 | wpd (WordPerfect)                                                         | libwpd                                                                       | yes      | no          | *planned*                                                                                                                              |
 | json, jsonl, ndjson                                                       | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes      | -           | -                                                                                                                                      |
@@ -109,7 +109,7 @@ Download the language data files with your package manager (`apt install tessera
 directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
 The `simon987/sist2` image comes with common languages
-(hin, jpn, eng, fra, rus, spa) pre-installed.
+(hin, jpn, eng, fra, rus, spa, chi_sim, deu) pre-installed.
 You can use the `+` separator to specify multiple languages. The language
 name must be identical to the `*.traineddata` file installed on your system 
@@ -141,7 +141,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
 1. Install compile-time dependencies
   ```bash
-   apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
+   apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
   ```
 1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
@@ -149,13 +149,14 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
 1. Install vcpkg dependencies
    ```bash
-    vcpkg install curl[core,openssl]
+    vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
    vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
    ```
 1. Build
    ```bash
    git clone --recursive https://github.com/simon987/sist2/
    (cd sist2-vue; npm install; npm run build)
    (cd sist2-admin/frontend; npm install; npm run build)
    cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
    make
    ```
--- a/contrib/systemd/sist2-update-files.sh
+++ b/contrib/systemd/sist2-update-files.sh
@@ -12,7 +12,7 @@ REWRITE_URL=""
 sist2 scan \
  --threads 14 \
  --mem-throttle 32768 \
-  --quality 1.0 \
+  --thumbnail-quality 2 \
  --name $NAME \
  --ocr-lang=eng+chi_sim \
  --ocr-ebooks \
--- a/contrib/systemd/sist2-update-nextcloud.sh
+++ b/contrib/systemd/sist2-update-nextcloud.sh
@@ -12,7 +12,7 @@ REWRITE_URL=""
 sist2 scan \
  --threads 14 \
  --mem-throttle 32768 \
-  --quality 1.0 \
+  --thumbnail-quality 2 \
  --name $NAME \
  --ocr-lang=eng+chi_sim \
  --ocr-ebooks \
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: "3"
 services:
  elasticsearch:
-    image: elasticsearch:7.14.0
+    image: elasticsearch:7.17.9
    container_name: sist2-es
    environment:
      - "discovery.type=single-node"
@@ -15,9 +15,9 @@ services:
      - /mnt/array/sist2-admin-data/:/sist2-admin/
      - /:/host
    ports:
      - 4090:4090
      # NOTE: Don't export this port publicly!
      - 8080:8080
      - 4090:4090
    working_dir: /root/sist2-admin/
    entrypoint: python3
    command:
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -33,7 +33,7 @@ Lightning-fast file system indexer and search tool.
 Scan options
    -t, --threads=<int>               Number of threads. DEFAULT=1
    --mem-throttle=<int>              Total memory threshold in MiB for scan throttling. DEFAULT=0
-    -q, --thumbnail-quality=<flt>     Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
+    -q, --thumbnail-quality=<int>     Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2
    --thumbnail-size=<int>            Thumbnail size, in pixels. DEFAULT=500
    --thumbnail-count=<int>           Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
    --content-size=<int>              Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
@@ -101,7 +101,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
    Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
    until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
 * `-q, --thumbnail-quality` 
-    Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
+    Thumbnail quality, on a scale of 2 to 32, 2 being the best. See section below for a rough estimate of thumbnail database size
 * `--thumbnail-size` 
    Thumbnail size in pixels.
 * `--thumbnail-count`
@@ -154,6 +154,18 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
  operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
  files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
 #### Thumbnail database size estimation
 See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
 For example, `--thumbnail-size=500`, `--thumbnail-quality=2` for a directory with 8 million images will create a thumbnail database 
 that is about `8000000 * 36kB = 288GB`.
 ![thumbnail_size](thumbnail_size.png)
 // TODO: add note about LMDB page size 4096
 ### Scan examples
 Simple scan
@@ -161,7 +173,7 @@ Simple scan
 sist2 scan ~/Documents
 sist2 scan \
-    --threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
+    --threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
    --name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
    ~/Documents -o ./documents.idx/
 ```
--- a/docs/thumbnail_size.png
+++ b/docs/thumbnail_size.png
--- a/scripts/before_build.sh
+++ b/scripts/before_build.sh
@@ -1,10 +1,13 @@
 #!/usr/bin/env bash
-rm -rf index.sist2/
+(
  cd ..
  rm -rf index.sist2
-python3 scripts/mime.py > src/parsing/mime_generated.c
+  python3 scripts/mime.py > src/parsing/mime_generated.c
-python3 scripts/serve_static.py > src/web/static_generated.c
+  python3 scripts/serve_static.py > src/web/static_generated.c
-python3 scripts/index_static.py > src/index/static_generated.c
+  python3 scripts/index_static.py > src/index/static_generated.c
-python3 scripts/magic_static.py > src/magic_generated.c
+  python3 scripts/magic_static.py > src/magic_generated.c
-printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
+  printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
 )
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -4,14 +4,20 @@ VCPKG_ROOT="/vcpkg"
 git submodule update --init --recursive
-rm -rf CMakeFiles CMakeCache.txt
+mkdir build
-cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+(
-make -j $(nproc)
+  cd build
-strip sist2
+  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
-./sist2 -v > VERSION
+  make -j $(nproc)
-mv sist2 sist2-x64-linux
+  strip sist2
  ./sist2 -v > VERSION
 )
 mv build/sist2 sist2-x64-linux
-rm -rf CMakeFiles CMakeCache.txt
+(
-cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+  cd build
-make -j  $(nproc)
+  rm -rf CMakeFiles CMakeCache.txt
-mv sist2_debug sist2-x64-linux-debug
+  cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ..
  make -j  $(nproc)
 )
 mv build/sist2_debug sist2-x64-linux-debug
--- a/scripts/build_arm64.sh
+++ b/scripts/build_arm64.sh
@@ -4,14 +4,19 @@ VCPKG_ROOT="/vcpkg"
 git submodule update --init --recursive
-rm -rf CMakeFiles CMakeCache.txt
+mkdir build
-cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+(
-make -j $(nproc)
+  cd build
-strip sist2
+  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-mv sist2 sist2-arm64-linux
+  make -j $(nproc)
  strip sist2
 )
 mv build/sist2 sist2-arm64-linux
 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+(
-make -j $(nproc)
+  cd build
-strip sist2
+  cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-mv sist2_debug sist2-arm64-linux-debug
+  make -j $(nproc)
 )
 mv build/sist2_debug sist2-arm64-linux-debug
--- a/scripts/mime.csv
+++ b/scripts/mime.csv
@@ -1,3 +1,4 @@
 application/x-matlab-data,mat
 application/arj, arj
 application/base64, mme
 application/binhex, hqx
@@ -29,7 +30,7 @@ application/mime, aps
 application/mspowerpoint, ppz
 application/msword, doc|dot|w6w|wiz|word
 application/netmc, mcp
-application/octet-stream, bin|dump|gpg
+application/octet-stream, bin|dump|gpg|pack|idx
 application/oda, oda
 application/ogg, ogv
 application/pdf, pdf
@@ -243,7 +244,7 @@ audio/make, funk|my|pfunk
 audio/midi, kar
 audio/mid, rmi
 audio/mp4, m4b
-audio/mpeg, m2a|mpa
+audio/mpeg, m2a|mpa|mpga
 audio/ogg, ogg
 audio/s3m, s3m
 audio/tsp-audio, tsi
@@ -346,6 +347,8 @@ text/mcf, mcf
 text/pascal, pas
 text/PGP,
 text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
 text/x-script.python, pyx
 text/csv,
 application/vnd.coffeescript, coffee
 text/richtext, rt|rtf|rtx
 text/rtf,
@@ -382,7 +385,7 @@ text/x-pascal, p
 text/x-perl, pl
 text/x-php, php
 text/x-po, po
-text/x-python, py
+text/x-python, py|pyi
 text/x-ruby, rb
 text/x-sass, sass
 text/x-scss, scss
--- a/scripts/mime.py
+++ b/scripts/mime.py
@@ -1,3 +1,5 @@
 import zlib
 mimes = {}
 noparse = set()
 ext_in_hash = set()
@@ -135,24 +137,40 @@ def clean(t):
    return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
 def crc(s):
    return zlib.crc32(s.encode()) & 0xffffffff
 with open("scripts/mime.csv") as f:
    for l in f:
        mime, ext_list = l.split(",")
        if l.startswith("!"):
            mime = mime[1:]
            noparse.add(mime)
-        ext = [x.strip() for x in ext_list.split("|")]
+        ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
        mimes[mime] = ext
    seen_crc = set()
    for ext in mimes.values():
        for e in ext:
            if crc(e) in seen_crc:
                raise Exception("CRC32 collision")
            seen_crc.add(crc(e))
    seen_crc = set()
    for mime in mimes.keys():
        if crc(mime) in seen_crc:
            raise Exception("CRC32 collision")
        seen_crc.add(crc(mime))
    print("// **Generated by mime.py**")
    print("#ifndef MIME_GENERATED_C")
    print("#define MIME_GENERATED_C")
    print("#include <glib.h>\n")
    print("#include <stdlib.h>\n")
    # Enum
    print("enum mime {")
    for mime, ext in sorted(mimes.items()):
-        print("    " + clean(mime) + "=" + mime_id(mime) + ",")
+        print(f"{clean(mime)}={mime_id(mime)},")
    print("};")
    # Enum -> string
@@ -163,20 +181,20 @@ with open("scripts/mime.csv") as f:
    print("default: return NULL;}}")
    # Ext -> Enum
-    print("GHashTable *mime_get_ext_table() {"
+    print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
-          "GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
+          "switch (extension_crc32) {")
    for mime, ext in mimes.items():
-        for e in [e for e in ext if e]:
+        if len(ext) > 0:
-            print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
+            for e in ext:
-            if e in ext_in_hash:
+                print(f"case {crc(e)}:", end="")
-                raise Exception("extension already in hash: " + e)
+            print(f"return {clean(mime)};")
-            ext_in_hash.add(e)
+    print("default: return 0;}}")
    print("return ext_table;}")
    # string -> Enum
-    print("GHashTable *mime_get_mime_table() {"
+    print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
-          "GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
+          "switch (mime_crc32) {")
-    for mime, ext in mimes.items():
+    for mime in mimes.keys():
-        print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
+        print(f"case {crc(mime)}: return {clean(mime)};")
-    print("return mime_table;}")
+
    print("default: return 0;}}")
    print("#endif")
--- a/scripts/start_dev_es.sh
+++ b/scripts/start_dev_es.sh
@@ -1,3 +1,3 @@
 docker run --rm -it --name "sist2-dev-es"\
       	-p 9200:9200 -e "discovery.type=single-node" \
-	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9
--- a/sist2-admin/frontend/dist/css/app.css
+++ b/sist2-admin/frontend/dist/css/app.css
@@ -1 +0,0 @@
 .navbar[data-v-27bc1d68]{box-shadow:0 .125rem .25rem rgba(0,0,0,.08)!important;border-radius:0}.theme-black .navbar[data-v-27bc1d68]{background:rgba(84,107,122,.18823529411764706);border-bottom:none}.navbar-brand[data-v-27bc1d68]{color:#222!important;font-size:1.75rem;padding:0}.navbar-brand[data-v-27bc1d68]:hover{color:#000!important}.version[data-v-27bc1d68]{color:#222!important;margin-left:-18px;margin-top:-14px;font-size:11px;font-family:monospace}.btn-link[data-v-27bc1d68]{color:#222}body,html{height:100%}#app{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;color:#2c3e50;padding-bottom:1em;min-height:100%}.info-icon{width:1rem;margin-right:.2rem;cursor:pointer;line-height:1rem;height:1rem;background-image:url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0MjYuNjY3IDQyNi42NjciIGZpbGw9IiNmZmYiPjxwYXRoIGQ9Ik0xOTIgMTkyaDQyLjY2N3YxMjhIMTkyeiIvPjxwYXRoIGQ9Ik0yMTMuMzMzIDBDOTUuNDY3IDAgMCA5NS40NjcgMCAyMTMuMzMzczk1LjQ2NyAyMTMuMzMzIDIxMy4zMzMgMjEzLjMzM1M0MjYuNjY3IDMzMS4yIDQyNi42NjcgMjEzLjMzMyAzMzEuMiAwIDIxMy4zMzMgMHptMCAzODRjLTk0LjA4IDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMgNDIuNjY3IDIxMy4zMzMgNDIuNjY3IDM4NCAxMTkuMjUzIDM4NCAyMTMuMzMzIDMwNy40MTMgMzg0IDIxMy4zMzMgMzg0eiIvPjxwYXRoIGQ9Ik0xOTIgMTA2LjY2N2g0Mi42Njd2NDIuNjY3SDE5MnoiLz48L3N2Zz4=);filter:brightness(45%);display:block}.tabs{margin-top:10px}.modal-title{text-overflow:ellipsis;overflow:hidden;white-space:nowrap}@media screen and (min-width:1500px){.container{max-width:1440px}}label{margin-top:.5rem;margin-bottom:0}.shrink[data-v-9b017c42]{flex-grow:inherit}#task-history[data-v-46960281]{font-family:monospace;font-size:12px}#log-tail-output span{display:block}span.DEBUG{color:#9e9e9e}span.WARNING{color:#ffb300}span.INFO{color:#039be5}span.ERROR,span.FATAL{color:#f4511e}span.ADMIN{color:#ee05ff}#log-tail-output{font-size:13px;font-family:monospace;padding:6px;background-color:#f5f5f5;border:1px solid #ccc;border-radius:4px;margin:3px;white-space:pre;color:#000;overflow:hidden}
--- a/sist2-admin/frontend/dist/css/chunk-vendors.css
+++ b/sist2-admin/frontend/dist/css/chunk-vendors.css
--- a/sist2-admin/frontend/dist/favicon.ico
+++ b/sist2-admin/frontend/dist/favicon.ico
--- a/sist2-admin/frontend/dist/index.html
+++ b/sist2-admin/frontend/dist/index.html
@@ -1 +0,0 @@
 <!DOCTYPE html><html lang=""><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><link rel="icon" href="favicon.ico"><title>sist2-admin</title><link href="css/app.css" rel="preload" as="style"><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="js/app.js" rel="preload" as="script"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/app.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but sist2-admin-vue doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/app.js"></script></body></html>
--- a/sist2-admin/frontend/dist/js/app.js
+++ b/sist2-admin/frontend/dist/js/app.js
--- a/sist2-admin/frontend/dist/js/chunk-vendors.js
+++ b/sist2-admin/frontend/dist/js/chunk-vendors.js
--- a/sist2-admin/frontend/package-lock.json
+++ b/sist2-admin/frontend/package-lock.json
--- a/sist2-admin/frontend/package.json
+++ b/sist2-admin/frontend/package.json
@@ -20,14 +20,11 @@
  },
  "devDependencies": {
    "@vue/cli-plugin-babel": "~5.0.8",
    "@vue/cli-plugin-eslint": "~5.0.8",
    "@vue/cli-plugin-router": "~5.0.8",
    "@vue/cli-plugin-vuex": "~5.0.8",
    "@vue/cli-service": "~5.0.8",
    "babel-eslint": "^10.1.0",
    "bootstrap": "^4.5.2",
    "eslint": "^6.7.2",
    "eslint-plugin-vue": "^6.2.2",
    "vue-template-compiler": "^2.6.11"
  },
  "eslintConfig": {
--- a/sist2-admin/frontend/public/index.html
+++ b/sist2-admin/frontend/public/index.html
@@ -4,7 +4,7 @@
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width,initial-scale=1.0">
-    <link rel="icon" href="<%= BASE_URL %>favicon.ico">
+    <link rel="icon" href="<%= BASE_URL %>serve_favicon_ico.ico">
    <title>sist2-admin</title>
  </head>
  <body>
--- a/sist2-admin/frontend/src/components/JobOptions.vue
+++ b/sist2-admin/frontend/src/components/JobOptions.vue
@@ -28,16 +28,22 @@ export default {
      return this.$store.state.jobDesktopNotificationMap[this.job.name];
    }
  },
-  methods: {
+    mounted() {
      this.cronValid = this.checkCron(this.job.cron_expression)
    },
    methods: {
      checkCron(expression) {
          return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
      },
    updateNotifications(value) {
      this.$store.dispatch("setJobDesktopNotification", {
        job: this.job.name,
        enabled: value
-      })
+      });
    },
    update() {
      if (this.job.schedule_enabled) {
-        this.cronValid = /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(this.job.cron_expression);
+        this.cronValid = this.checkCron(this.job.cron_expression);
      } else {
        this.cronValid = undefined;
      }
--- a/sist2-admin/frontend/src/components/ScanOptions.vue
+++ b/sist2-admin/frontend/src/components/ScanOptions.vue
@@ -6,9 +6,6 @@
    <label>{{ $t("scanOptions.threads") }}</label>
    <b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>
    <label>{{ $t("scanOptions.memThrottle") }}</label>
    <b-form-input type="number" min="0" v-model="options.mem_throttle" @change="update()"></b-form-input>
    <label>{{ $t("scanOptions.thumbnailQuality") }}</label>
    <b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>
@@ -70,8 +67,9 @@
      {{ $t("scanOptions.readSubtitles") }}
    </b-form-checkbox>
-    <label>{{ $t("scanOptions.memBuffer") }}</label>
+    <b-form-checkbox v-model="options.optimize_index" @change="update()">
-    <b-form-input type="number" min="0" v-model="options.mem_buffer" @change="update()"></b-form-input>
+        {{ $t("scanOptions.optimizeIndex") }}
    </b-form-checkbox>
    <label>{{ $t("scanOptions.treemapThreshold") }}</label>
    <b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
--- a/sist2-admin/frontend/src/i18n/messages.js
+++ b/sist2-admin/frontend/src/i18n/messages.js
@@ -56,13 +56,17 @@ export default {
            tagline: "Tagline in navbar",
            auth: "Basic auth in user:password format",
            tagAuth: "Basic auth in user:password format for tagging",
            auth0Audience: "Auth0 audience",
            auth0Domain: "Auth0 domain",
            auth0ClientId: "Auth0 client ID",
            auth0PublicKey: "Auth0 public key",
        },
        scanOptions: {
            title: "Scanning options",
            path: "Path",
            threads: "Number of threads",
            memThrottle: "Total memory threshold in MiB for scan throttling",
-            thumbnailQuality: "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best",
+            thumbnailQuality: "Thumbnail quality, on a scale of 2 to 32, 2 being the best",
            thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
            thumbnailSize: "Thumbnail size, in pixels",
            contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",
@@ -80,7 +84,8 @@ export default {
            checksums: "Calculate file checksums when scanning",
            readSubtitles: "Read subtitles from media files",
            memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
-            treemapThreshold: "Relative size threshold for treemap"
+            treemapThreshold: "Relative size threshold for treemap",
            optimizeIndex: "Defragment index file after scan to reduce its file size."
        },
        indexOptions: {
            title: "Indexing options",
--- a/sist2-admin/frontend/src/views/Tasks.vue
+++ b/sist2-admin/frontend/src/views/Tasks.vue
@@ -40,6 +40,39 @@ import TaskListItem from "@/components/TaskListItem";
 import Sist2AdminApi from "@/Sist2AdminApi";
 import moment from "moment";
 const DAY = 3600 * 24;
 const HOUR = 3600;
 const MINUTE = 60;
 function humanDuration(sec_num) {
  sec_num = sec_num / 1000;
  const days = Math.floor(sec_num / DAY);
  sec_num -= days * DAY;
  const hours = Math.floor(sec_num / HOUR);
  sec_num -= hours * HOUR;
  const minutes = Math.floor(sec_num / MINUTE);
  sec_num -= minutes * MINUTE;
  const seconds = Math.floor(sec_num);
  if (days > 0) {
    return `${days} days ${hours}h ${minutes}m ${seconds}s`;
  }
  if (hours > 0) {
    return `${hours}h ${minutes}m ${seconds}s`;
  }
  if (minutes > 0) {
    return `${minutes}m ${seconds}s`;
  }
  if (seconds > 0) {
    return `${seconds}s`;
  }
  return "<0s";
 }
 export default {
  name: 'Tasks',
  components: {TaskListItem},
@@ -100,17 +133,10 @@ export default {
      })
    },
    taskDuration(task) {
-      const start = moment(task.started);
+      const start = moment.utc(task.started);
-      const end = moment(task.ended);
+      const end = moment.utc(task.ended);
-      let duration = moment.utc(end.diff(start)).format("HH[h] mm[m] ss[s]");
+      return humanDuration(end.diff(start))
      duration = duration.replace("00h ", "");
      duration = duration.replace(/^00m /, "");
      duration = duration.replace(/00s/, "<1s");
      duration = duration.replace(/^0/, "");
      return duration;
    }
  }
 }
--- a/sist2-admin/frontend/yarn.lock
+++ b/sist2-admin/frontend/yarn.lock
--- a/sist2-admin/sist2_admin/app.py
+++ b/sist2-admin/sist2_admin/app.py
@@ -21,13 +21,11 @@ from config import LOG_FOLDER, logger, WEBSERVER_PORT, DATA_FOLDER, SIST2_BINARY
 from jobs import Sist2Job, Sist2ScanTask, TaskQueue, Sist2IndexTask, JobStatus
 from notifications import Subscribe, Notifications
 from sist2 import Sist2
-from state import PickleTable, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
+from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEMA_VERSION
 from web import Sist2Frontend
 VERSION = "1.0"
 sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
-db = PersistentState(table_factory=PickleTable, dbfile=os.path.join(DATA_FOLDER, "state.db"))
+db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
 notifications = Notifications()
 task_queue = TaskQueue(sist2, db, notifications)
@@ -52,7 +50,6 @@ async def home():
@app.get("/api")
 async def api():
    return {
        "version": VERSION,
        "tesseract_langs": TESSERACT_LANGS,
        "logs_folder": LOG_FOLDER
    }
@@ -60,18 +57,17 @@ async def api():
@app.get("/api/job/{name:str}")
 async def get_job(name: str):
-    row = db["jobs"][name]
+    job = db["jobs"][name]
-    if row:
+    if not job:
-        return row["job"]
+        raise HTTPException(status_code=404)
-    raise HTTPException(status_code=404)
+    return job
@app.get("/api/frontend/{name:str}")
 async def get_frontend(name: str):
-    row = db["frontends"][name]
+    frontend = db["frontends"][name]
-    if row:
+    frontend: Sist2Frontend
-        frontend = row["frontend"]
+    if frontend:
        frontend: Sist2Frontend
        frontend.running = frontend.name in RUNNING_FRONTENDS
        return frontend
    raise HTTPException(status_code=404)
@@ -79,16 +75,16 @@ async def get_frontend(name: str):
@app.get("/api/job/")
 async def get_jobs():
-    return [row["job"] for row in db["jobs"]]
+    return list(db["jobs"])
@app.put("/api/job/{name:str}")
-async def update_job(name: str, job: Sist2Job):
+async def update_job(name: str, new_job: Sist2Job):
    # TODO: Check etag
-    job.last_modified = datetime.now()
+    new_job.last_modified = datetime.now()
-    row = db["jobs"][name]
+    job = db["jobs"][name]
-    if not row:
+    if not job:
        raise HTTPException(status_code=404)
    args_that_trigger_full_scan = [
@@ -108,15 +104,15 @@ async def update_job(name: str, job: Sist2Job):
        "read_subtitles",
    ]
    for arg in args_that_trigger_full_scan:
-        if getattr(row["job"].scan_options, arg) != getattr(job.scan_options, arg):
+        if getattr(new_job.scan_options, arg) != getattr(job.scan_options, arg):
-            job.do_full_scan = True
+            new_job.do_full_scan = True
-    db["jobs"][name] = {"job": job}
+    db["jobs"][name] = new_job
@app.put("/api/frontend/{name:str}")
 async def update_frontend(name: str, frontend: Sist2Frontend):
-    db["frontends"][name] = {"frontend": frontend}
+    db["frontends"][name] = frontend
    # TODO: Check etag
@@ -142,7 +138,7 @@ def _run_job(job: Sist2Job):
    job.last_modified = datetime.now()
    if job.status == JobStatus("created"):
        job.status = JobStatus("started")
-    db["jobs"][job.name] = {"job": job}
+    db["jobs"][job.name] = job
    scan_task = Sist2ScanTask(job, f"Scan [{job.name}]")
    index_task = Sist2IndexTask(job, f"Index [{job.name}]", depends_on=scan_task)
@@ -153,19 +149,19 @@ def _run_job(job: Sist2Job):
@app.get("/api/job/{name:str}/run")
 async def run_job(name: str):
-    row = db["jobs"][name]
+    job = db["jobs"][name]
-    if not row:
+    if not job:
        raise HTTPException(status_code=404)
-    _run_job(row["job"])
+    _run_job(job)
    return "ok"
@app.delete("/api/job/{name:str}")
 async def delete_job(name: str):
-    row = db["jobs"][name]
+    job = db["jobs"][name]
-    if row:
+    if job:
        del db["jobs"][name]
    else:
        raise HTTPException(status_code=404)
@@ -177,8 +173,8 @@ async def delete_frontend(name: str):
        os.kill(RUNNING_FRONTENDS[name], signal.SIGTERM)
        del RUNNING_FRONTENDS[name]
-    row = db["frontends"][name]
+    frontend = db["frontends"][name]
-    if row:
+    if frontend:
        del db["frontends"][name]
    else:
        raise HTTPException(status_code=404)
@@ -190,18 +186,18 @@ async def create_job(name: str):
        raise ValueError("Job with the same name already exists")
    job = Sist2Job.create_default(name)
-    db["jobs"][name] = {"job": job}
+    db["jobs"][name] = job
    return job
@app.post("/api/frontend/{name:str}")
 async def create_frontend(name: str):
-    if db["frontend"][name]:
+    if db["frontends"][name]:
        raise ValueError("Frontend with the same name already exists")
    frontend = Sist2Frontend.create_default(name)
-    db["frontends"][name] = {"frontend": frontend}
+    db["frontends"][name] = frontend
    return frontend
@@ -255,7 +251,7 @@ def check_es_version(es_url: str, insecure: bool):
 def start_frontend_(frontend: Sist2Frontend):
-    frontend.web_options.indices = list(map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs))
+    frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))
    pid = sist2.web(frontend.web_options, frontend.name)
    RUNNING_FRONTENDS[frontend.name] = pid
@@ -263,11 +259,11 @@ def start_frontend_(frontend: Sist2Frontend):
@app.post("/api/frontend/{name:str}/start")
 async def start_frontend(name: str):
-    row = db["frontends"][name]
+    frontend = db["frontends"][name]
-    if not row:
+    if not frontend:
        raise HTTPException(status_code=404)
-    start_frontend_(row["frontend"])
+    start_frontend_(frontend)
@app.post("/api/frontend/{name:str}/stop")
@@ -280,8 +276,7 @@ async def stop_frontend(name: str):
@app.get("/api/frontend/")
 async def get_frontends():
    res = []
-    for row in db["frontends"]:
+    for frontend in db["frontends"]:
        frontend = row["frontend"]
        frontend: Sist2Frontend
        frontend.running = frontend.name in RUNNING_FRONTENDS
        res.append(frontend)
@@ -364,14 +359,14 @@ def initialize_db():
    db["sist2_admin"]["info"] = {"version": DB_SCHEMA_VERSION}
    frontend = Sist2Frontend.create_default("default")
-    db["frontends"]["default"] = {"frontend": frontend}
+    db["frontends"]["default"] = frontend
    logger.info("Initialized database.")
 def start_frontends():
-    for row in db["frontends"]:
+    for frontend in db["frontends"]:
-        frontend: Sist2Frontend = row["frontend"]
+        frontend: Sist2Frontend
        if frontend.auto_start and len(frontend.jobs) > 0:
            start_frontend_(frontend)
@@ -380,8 +375,11 @@ if __name__ == '__main__':
    if not db["sist2_admin"]["info"]:
        initialize_db()
-    elif db["sist2_admin"]["info"]["version"] != DB_SCHEMA_VERSION:
+    if db["sist2_admin"]["info"]["version"] == "1":
-        print("Database has incompatible schema version! Delete state.db to continue.")
+        logger.info("Migrating to v2 database schema")
        migrate_v1_to_v2(db)
    if db["sist2_admin"]["info"]["version"] == "2":
        logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
        exit(-1)
    start_frontends()
--- a/sist2-admin/sist2_admin/cron.py
+++ b/sist2-admin/sist2_admin/cron.py
@@ -10,7 +10,7 @@ from jobs import Sist2Job
 def _check_schedule(db: PersistentState, run_job):
-    for job in (row["job"] for row in db["jobs"]):
+    for job in db["jobs"]:
        job: Sist2Job
        if job.schedule_enabled:
--- a/sist2-admin/sist2_admin/jobs.py
+++ b/sist2-admin/sist2_admin/jobs.py
@@ -1,23 +1,21 @@
 import json
 import logging
 import os.path
 import shutil
 import signal
 import uuid
 from datetime import datetime
 from enum import Enum
 from hashlib import md5
 from logging import FileHandler
 from threading import Lock, Thread
 from time import sleep
 from uuid import uuid4, UUID
 from hexlib.db import PersistentState
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
 from config import logger, LOG_FOLDER
 from notifications import Notifications
-from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
+from sist2 import ScanOptions, IndexOptions, Sist2
 from state import RUNNING_FRONTENDS
 from web import Sist2Frontend
@@ -38,7 +36,8 @@ class Sist2Job(BaseModel):
    schedule_enabled: bool = False
    previous_index: str = None
-    last_index: str = None
+    index_path: str = None
    previous_index_path: str = None
    last_index_date: datetime = None
    status: JobStatus = JobStatus("created")
    last_modified: datetime
@@ -58,10 +57,10 @@ class Sist2Job(BaseModel):
            cron_expression="0 0 * * *"
        )
-    @validator("etag", always=True)
+    # @validator("etag", always=True)
-    def validate_etag(cls, value, values):
+    # def validate_etag(cls, value, values):
-        s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
+    #     s = values["name"] + values["scan_options"].json() + values["index_options"].json() + values["cron_expression"]
-        return md5(s.encode()).hexdigest()
+    #     return md5(s.encode()).hexdigest()
 class Sist2TaskProgress:
@@ -124,10 +123,10 @@ class Sist2ScanTask(Sist2Task):
        self.job.scan_options.name = self.job.name
-        if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
+        if self.job.index_path is not None and not self.job.do_full_scan:
-            self.job.scan_options.incremental = self.job.last_index
+            self.job.scan_options.output = self.job.index_path
        else:
-            self.job.scan_options.incremental = None
+            self.job.scan_options.output = None
        def set_pid(pid):
            self.pid = pid
@@ -139,19 +138,26 @@ class Sist2ScanTask(Sist2Task):
            self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
            logger.info(f"Task {self.display_name} failed ({return_code})")
        else:
-            index = Sist2Index(self.job.scan_options.output)
+            self.job.index_path = self.job.scan_options.output
            # Save latest index
            self.job.previous_index = self.job.last_index
            self.job.last_index = index.path
            self.job.last_index_date = datetime.now()
            self.job.do_full_scan = False
-            db["jobs"][self.job.name] = {"job": self.job}
+            db["jobs"][self.job.name] = self.job
-            self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
+            self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))
        logger.info(f"Completed {self.display_name} ({return_code=})")
        # Remove old index
        if return_code == 0:
            if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
                self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
                try:
                    os.remove(self.job.previous_index_path)
                except FileNotFoundError:
                    pass
            self.job.previous_index_path = self.job.index_path
            db["jobs"][self.job.name] = self.job
        return return_code
@@ -173,19 +179,12 @@ class Sist2IndexTask(Sist2Task):
        ok = return_code == 0
        if ok:
            # Remove old index
            if self.job.previous_index is not None:
                self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
                try:
                    shutil.rmtree(self.job.previous_index)
                except FileNotFoundError:
                    pass
            self.restart_running_frontends(db, sist2)
        # Update status
        self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
-        db["jobs"][self.job.name] = {"job": self.job}
+        self.job.previous_index_path = self.job.index_path
        db["jobs"][self.job.name] = self.job
        self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
@@ -195,16 +194,19 @@ class Sist2IndexTask(Sist2Task):
    def restart_running_frontends(self, db: PersistentState, sist2: Sist2):
        for frontend_name, pid in RUNNING_FRONTENDS.items():
-            frontend = db["frontends"][frontend_name]["frontend"]
+            frontend = db["frontends"][frontend_name]
            frontend: Sist2Frontend
-            os.kill(pid, signal.SIGTERM)
+            try:
                os.kill(pid, signal.SIGTERM)
            except ProcessLookupError:
                pass
            try:
                os.wait()
            except ChildProcessError:
                pass
-            frontend.web_options.indices = map(lambda j: db["jobs"][j]["job"].last_index, frontend.jobs)
+            frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)
            pid = sist2.web(frontend.web_options, frontend.name)
            RUNNING_FRONTENDS[frontend_name] = pid
--- a/sist2-admin/sist2_admin/sist2.py
+++ b/sist2-admin/sist2_admin/sist2.py
@@ -2,7 +2,6 @@ import datetime
 import json
 import logging
 import os.path
 import traceback
 from datetime import datetime
 from io import TextIOWrapper
 from logging import FileHandler
@@ -63,7 +62,7 @@ class WebOptions(BaseModel):
        if self.auth:
            args.append(f"--auth={self.auth}")
        if self.tag_auth:
-            args.append(f"--tag_auth={self.tag_auth}")
+            args.append(f"--tag-auth={self.tag_auth}")
        if self.dev:
            args.append(f"--dev")
@@ -78,10 +77,10 @@ class IndexOptions(BaseModel):
    es_url: str = "http://elasticsearch:9200"
    es_insecure_ssl: bool = False
    es_index: str = "sist2"
-    incremental_index: bool = False
+    incremental_index: bool = True
    script: str = ""
    script_file: str = None
-    batch_size: int = 100
+    batch_size: int = 70
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
@@ -110,15 +109,14 @@ ARCHIVE_RECURSE = "recurse"
 class ScanOptions(BaseModel):
    path: str
    threads: int = 1
-    mem_throttle: int = 0
+    thumbnail_quality: int = 2
-    thumbnail_quality: float = 1.0
+    thumbnail_size: int = 552
    thumbnail_size: int = 500
    thumbnail_count: int = 1
    content_size: int = 32768
    depth: int = -1
    archive: str = ARCHIVE_RECURSE
    archive_passphrase: str = None
-    ocr_lang: bool = None
+    ocr_lang: str = None
    ocr_images: bool = False
    ocr_ebooks: bool = False
    exclude: str = None
@@ -128,7 +126,8 @@ class ScanOptions(BaseModel):
    read_subtitles: bool = False
    fast_epub: bool = False
    checksums: bool = False
-    incremental: str = None
+    incremental: bool = True
    optimize_index: bool = False
    output: str = None
    name: str = None
    rewrite_url: str = None
@@ -138,13 +137,15 @@ class ScanOptions(BaseModel):
        super().__init__(**kwargs)
    def args(self):
-        args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
+        args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
-                f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
+                f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
                f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
                f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]
        if self.incremental:
-            args.append(f"--incremental={self.incremental}")
+            args.append(f"--incremental")
        if self.optimize_index:
            args.append(f"--optimize-index")
        if self.rewrite_url:
            args.append(f"--rewrite-url={self.rewrite_url}")
        if self.name:
@@ -234,11 +235,11 @@ class Sist2:
    def scan(self, options: ScanOptions, logs_cb, set_pid_cb):
-        output_dir = os.path.join(
+        if options.output is None:
-            self._data_dir,
+            options.output = os.path.join(
-            f"scan-{datetime.now()}.sist2"
+                self._data_dir,
-        )
+                f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
-        options.output = output_dir
+            )
        args = [
            self._bin_path,
@@ -277,23 +278,17 @@ class Sist2:
    @staticmethod
    def _consume_logs_stdout(logs_cb, proc):
        pipe_wrapper = TextIOWrapper(proc.stdout, encoding="utf8", errors="ignore")
-        try:
+        for line in pipe_wrapper:
-            for line in pipe_wrapper:
+            try:
                if line.strip() == "":
                    continue
                log_object = json.loads(line)
                logs_cb(log_object)
-        except Exception as e:
+            except Exception as e:
-            proc.kill()
+                try:
-            try:
+                    logs_cb({"sist2-admin": f"Could not decode log line: {line}; {e}"})
-                print(line)
+                except NameError:
-            except NameError:
+                    pass
                pass
            print(traceback.format_exc())
        finally:
            pass
            # proc.wait()
            # pipe_wrapper.close()
    def web(self, options: WebOptions, name: str):
--- a/sist2-admin/sist2_admin/state.py
+++ b/sist2-admin/sist2_admin/state.py
@@ -1,6 +1,7 @@
 from typing import Dict
 import shutil
-from hexlib.db import Table
+from hexlib.db import Table, PersistentState
 import pickle
 from tesseract import get_tesseract_langs
@@ -9,7 +10,7 @@ RUNNING_FRONTENDS: Dict[str, int] = {}
 TESSERACT_LANGS = get_tesseract_langs()
-DB_SCHEMA_VERSION = "1"
+DB_SCHEMA_VERSION = "3"
 from pydantic import BaseModel
@@ -48,3 +49,31 @@ class PickleTable(Table):
        for row in super().sql(where_clause, *params):
            yield dict((k, _deserialize(v)) for k, v in row.items())
 def migrate_v1_to_v2(db: PersistentState):
    shutil.copy(db.dbfile, db.dbfile + "-before-migrate-v2.bak")
    # Frontends
    db._table_factory = PickleTable
    frontends = [row["frontend"] for row in db["frontends"]]
    del db["frontends"]
    db._table_factory = Table
    for frontend in frontends:
        db["frontends"][frontend.name] = frontend
    list(db["frontends"])
    # Jobs
    db._table_factory = PickleTable
    jobs = [row["job"] for row in db["jobs"]]
    del db["jobs"]
    db._table_factory = Table
    for job in jobs:
        db["jobs"][job.name] = job
    list(db["jobs"])
    db["sist2_admin"]["info"] = {
        "version": "2"
    }
--- a/sist2-vue/dist/css/chunk-vendors.css
+++ b/sist2-vue/dist/css/chunk-vendors.css
--- a/sist2-vue/dist/css/index.css
+++ b/sist2-vue/dist/css/index.css
--- a/sist2-vue/dist/index.html
+++ b/sist2-vue/dist/index.html
@@ -1,3 +0,0 @@
 <!doctype html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"/><title>sist2</title><script defer="defer" src="js/chunk-vendors.js"></script><script defer="defer" src="js/index.js"></script><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
            height: initial;
        }</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br/><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div></body></html>
--- a/sist2-vue/dist/js/chunk-vendors.js
+++ b/sist2-vue/dist/js/chunk-vendors.js
--- a/sist2-vue/dist/js/index.js
+++ b/sist2-vue/dist/js/index.js
--- a/sist2-vue/fslightbox-vue.tgz
+++ b/sist2-vue/fslightbox-vue.tgz
--- a/sist2-vue/package-lock.json
+++ b/sist2-vue/package-lock.json
@@ -16,7 +16,7 @@
        "d3": "^5.6.1",
        "date-fns": "^2.21.3",
        "dom-to-image": "^2.6.0",
-        "fslightbox-vue": "file:../../../../mnt/Hatchery/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
+        "fslightbox-vue": "fslightbox-vue.tgz",
        "nouislider": "^15.2.0",
        "underscore": "^1.13.1",
        "vue": "^2.6.12",
@@ -6581,7 +6581,7 @@
    },
    "node_modules/fslightbox-vue": {
      "version": "1.3.1",
-      "resolved": "file:../../../Hatchery/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
+      "resolved": "file:fslightbox-vue.tgz",
      "integrity": "sha512-dK+X5hH5hpohfqcmBvGmqGvkkoQhjNqWGb7CZAS1Dz6aQ6y7SgdepVf6xiUPveemzpzoeAGO4KoJ+UBAZDRLwQ==",
      "license": "MIT",
      "peerDependencies": {
@@ -16602,7 +16602,7 @@
      "optional": true
    },
    "fslightbox-vue": {
-      "version": "file:../../../Hatchery/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
+      "version": "file:fslightbox-vue.tgz",
      "integrity": "sha512-dK+X5hH5hpohfqcmBvGmqGvkkoQhjNqWGb7CZAS1Dz6aQ6y7SgdepVf6xiUPveemzpzoeAGO4KoJ+UBAZDRLwQ==",
      "requires": {}
    },
--- a/sist2-vue/package.json
+++ b/sist2-vue/package.json
@@ -15,7 +15,7 @@
    "d3": "^5.6.1",
    "date-fns": "^2.21.3",
    "dom-to-image": "^2.6.0",
-    "fslightbox-vue": "file:../../../../mnt/Hatchery/projects/sist2/fslightbox-vue-pro-1.3.1.tgz",
+    "fslightbox-vue": "fslightbox-vue.tgz",
    "nouislider": "^15.2.0",
    "underscore": "^1.13.1",
    "vue": "^2.6.12",
--- a/sist2-vue/src/App.vue
+++ b/sist2-vue/src/App.vue
@@ -10,7 +10,7 @@
      <b-spinner type="grow" variant="primary"></b-spinner>
    </div>
    <div class="loading-text">
-      Loading • Chargement • 装载
+      Loading • Chargement • 装载 • Wird geladen
    </div>
  </div>
 </template>
--- a/sist2-vue/src/Sist2Api.ts
+++ b/sist2-vue/src/Sist2Api.ts
@@ -61,6 +61,7 @@ export interface EsHit {
        isAudio: boolean
        hasThumbnail: boolean
        hasVidPreview: boolean
        imageAspectRatio: number
        /** Number of thumbnails available */
        tnNum: number
    }
@@ -155,6 +156,9 @@ class Sist2Api {
                    && hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") {
                    hit._props.isPlayableImage = true;
                }
                if ("width" in hit._source && "height" in hit._source) {
                    hit._props.imageAspectRatio = hit._source.width / hit._source.height;
                }
                break;
            case "video":
                if ("videoc" in hit._source) {
@@ -187,30 +191,6 @@ class Sist2Api {
    setHitTags(hit: EsHit): void {
        const tags = [] as Tag[];
        const mimeCategory = hit._source.mime == null ? null : hit._source.mime.split("/")[0];
        switch (mimeCategory) {
            case "image":
            case "video":
                if ("videoc" in hit._source && hit._source.videoc) {
                    tags.push({
                        style: "video",
                        text: hit._source.videoc.replace(" ", ""),
                        userTag: false
                    } as Tag);
                }
                break
            case "audio":
                if ("audioc" in hit._source && hit._source.audioc) {
                    tags.push({
                        style: "audio",
                        text: hit._source.audioc,
                        userTag: false
                    } as Tag);
                }
                break;
        }
        // User tags
        if ("tag" in hit._source) {
            hit._source.tag.forEach(tag => {
--- a/sist2-vue/src/components/DocCard.vue
+++ b/sist2-vue/src/components/DocCard.vue
@@ -27,6 +27,11 @@
          <DocFileTitle :doc="doc"></DocFileTitle>
        </div>
        <!-- Featured line -->
        <div style="display: flex">
          <FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
        </div>
        <!-- Tags -->
        <div class="card-text">
          <TagContainer :hit="doc"></TagContainer>
@@ -43,10 +48,11 @@ import DocFileTitle from "@/components/DocFileTitle.vue";
 import DocInfoModal from "@/components/DocInfoModal.vue";
 import ContentDiv from "@/components/ContentDiv.vue";
 import FullThumbnail from "@/components/FullThumbnail";
 import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
 export default {
-  components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
+  components: {FeaturedFieldsLine, FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
  props: ["doc", "width"],
  data() {
    return {
--- a/sist2-vue/src/components/DocListItem.vue
+++ b/sist2-vue/src/components/DocListItem.vue
@@ -50,6 +50,11 @@
          <span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
          <span v-if="doc._source.author">{{ doc._source.author }}</span>
        </div>
        <!-- Featured line -->
        <div style="display: flex">
          <FeaturedFieldsLine :doc="doc"></FeaturedFieldsLine>
        </div>
      </div>
    </div>
  </b-list-group-item>
@@ -61,10 +66,11 @@ import DocFileTitle from "@/components/DocFileTitle";
 import DocInfoModal from "@/components/DocInfoModal";
 import ContentDiv from "@/components/ContentDiv";
 import FileIcon from "@/components/icons/FileIcon";
 import FeaturedFieldsLine from "@/components/FeaturedFieldsLine";
 export default {
  name: "DocListItem",
-  components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
+  components: {FileIcon, ContentDiv, DocInfoModal, DocFileTitle, TagContainer, FeaturedFieldsLine},
  props: ["doc"],
  data() {
    return {
--- a/sist2-vue/src/components/FeaturedFieldsLine.vue
+++ b/sist2-vue/src/components/FeaturedFieldsLine.vue
@@ -0,0 +1,42 @@
 <template>
  <div class="featured-line" v-html="featuredLineHtml"></div>
 </template>
 <script>
 import {humanDate, humanFileSize} from "@/util";
 function scopedEval(context, expr) {
  const evaluator = Function.apply(null, [...Object.keys(context), "expr", "return eval(expr)"]);
  return evaluator.apply(null, [...Object.values(context), expr]);
 }
 export default {
  name: "FeaturedFieldsLine",
  props: ["doc"],
  computed: {
    featuredLineHtml() {
      const scope = {doc: this.doc._source, humanDate: humanDate, humanFileSize: humanFileSize};
      return this.$store.getters.optFeaturedFields
          .replaceAll(/\$\{([^}]*)}/g, (match, g1) => {
            return scopedEval(scope, g1);
          });
    }
  }
 }
 </script>
 <style scoped>
 .featured-line {
  font-size: 90%;
  font-family: 'Source Sans Pro', 'Helvetica Neue', Arial, sans-serif;
  color: #424242;
  padding-left: 2px;
 }
 .theme-black .featured-line {
  color: #bebebe;
 }
 </style>
--- a/sist2-vue/src/components/FullThumbnail.vue
+++ b/sist2-vue/src/components/FullThumbnail.vue
@@ -6,13 +6,13 @@
    </div>
    <div
-        v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
+        v-if="doc._props.isImage && doc._props.imageAspectRatio < 5"
        class="card-img-overlay"
        :class="{'small-badge': smallBadge}">
      <span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
    </div>
-    <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
+    <div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0"
         class="card-img-overlay"
         :class="{'small-badge': smallBadge}">
      <span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
@@ -63,6 +63,11 @@ export default {
  },
  computed: {
    tnSrc() {
      return this.getThumbnailSrc(this.currentThumbnailNum);
    },
  },
  methods: {
    getThumbnailSrc(thumbnailNum) {
      const doc = this.doc;
      const props = doc._props;
      if (props.isGif && this.hover) {
@@ -70,10 +75,8 @@ export default {
      }
      return (this.currentThumbnailNum === 0)
          ? `t/${doc._source.index}/${doc._id}`
-          : `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
+          : `t/${doc._source.index}/${doc._id}/${String(thumbnailNum).padStart(4, "0")}`;
    },
  },
  methods: {
    humanTime: humanTime,
    onThumbnailClick() {
      this.$emit("onThumbnailClick");
@@ -86,9 +89,14 @@ export default {
    },
    onTnEnter() {
      this.hover = true;
      const start = Date.now()
      if (this.doc._props.hasVidPreview) {
-        this.currentThumbnailNum += 1;
+        let img = new Image();
-        this.scheduleNextTnNum();
+        img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
        img.onload = () => {
          this.currentThumbnailNum += 1;
          this.scheduleNextTnNum(Date.now() - start);
        }
      }
    },
    onTnLeave() {
@@ -99,17 +107,23 @@ export default {
        this.timeoutId = null;
      }
    },
-    scheduleNextTnNum() {
+    scheduleNextTnNum(offset = 0) {
-      const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
+      const INTERVAL = (this.$store.state.optVidPreviewInterval ?? 700) - offset;
      this.timeoutId = window.setTimeout(() => {
        const start = Date.now();
        if (!this.hover) {
          return;
        }
        this.scheduleNextTnNum();
        if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
          this.currentThumbnailNum = 0;
          this.scheduleNextTnNum();
        } else {
-          this.currentThumbnailNum += 1;
+          let img = new Image();
          img.src = this.getThumbnailSrc(this.currentThumbnailNum + 1);
          img.onload = () => {
            this.currentThumbnailNum += 1;
            this.scheduleNextTnNum(Date.now() - start);
          }
        }
      }, INTERVAL);
    },
@@ -152,17 +166,18 @@ export default {
 }
 .badge-resolution {
-  color: #212529;
+  color: #c6c6c6;
-  background-color: #FFC107;
+  background-color: #272727CC;
  padding: 2px 3px;
 }
 .card-img-overlay {
  pointer-events: none;
-  padding: 0.75rem;
+  padding: 2px 6px;
-  bottom: unset;
+  bottom: 4px;
-  top: 0;
+  top: unset;
  left: unset;
-  right: unset;
+  right: 0;
 }
 .small-badge {
--- a/sist2-vue/src/components/Lightbox.vue
+++ b/sist2-vue/src/components/Lightbox.vue
@@ -160,9 +160,13 @@ export default {
    },
    onSlideChange() {
      // Pause all videos when changing slide
-      document.getElementsByTagName("video").forEach((el) => {
+      const videos = document.getElementsByTagName("video");
      if (videos.length === 0) {
        return
      }
      for (let el of videos) {
        el.pause();
-      });
+      }
    },
  }
--- a/sist2-vue/src/components/TagContainer.vue
+++ b/sist2-vue/src/components/TagContainer.vue
@@ -40,6 +40,7 @@
    <template v-for="tag in hit._tags">
      <!-- User tag-->
      <div v-if="tag.userTag" :key="tag.rawText" style="display: inline-block">
        <span
            :id="hit._id+tag.rawText"
@@ -51,7 +52,7 @@
        >{{ tag.text.split(".").pop() }}</span>
        <b-popover :target="hit._id+tag.rawText" triggers="focus blur" placement="top">
-          <b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{$t("deleteTag")}}</b-button>
+          <b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{ $t("deleteTag") }}</b-button>
        </b-popover>
      </div>
@@ -66,7 +67,7 @@
    <small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">{{$t("addTag")}}</small>
    <!-- Size tag-->
-    <small v-else class="text-muted badge-size">{{
+    <small v-else class="text-muted badge-size" style="padding-left: 2px">{{
        humanFileSize(hit._source.size)
      }}</small>
  </div>
@@ -211,7 +212,7 @@ export default Vue.extend({
      return matches.sort().map(match => {
        return {
-          title: match.split(".").slice(0,-1).join("."),
+          title: match.split(".").slice(0, -1).join("."),
          id: match
        }
      });
--- a/sist2-vue/src/i18n/messages.ts
+++ b/sist2-vue/src/i18n/messages.ts
@@ -8,7 +8,7 @@ export default {
            advanced: "Advanced search",
            fuzzy: "Fuzzy"
        },
-        addTag: "Add",
+        addTag: "Tag",
        deleteTag: "Delete",
        download: "Download",
        and: "and",
@@ -17,6 +17,7 @@ export default {
        mimeTypes: "Media types",
        tags: "Tags",
        tagFilter: "Filter tags",
        forExample: "For example:",
        help: {
            simpleSearch: "Simple search",
            advancedSearch: "Advanced search",
@@ -75,7 +76,9 @@ export default {
            useDatePicker: "Use a Date Picker component rather than a slider",
            vidPreviewInterval: "Video preview frame duration in ms",
            simpleLightbox: "Disable animations in image viewer",
-            showTagPickerFilter: "Display the tag filter bar"
+            showTagPickerFilter: "Display the tag filter bar",
            featuredFields: "Featured fields Javascript template string. Will appear in the search results.",
            featuredFieldsList: "Available variables"
        },
        queryMode: {
            simple: "Simple",
@@ -83,6 +86,7 @@ export default {
        },
        lang: {
            en: "English",
            de: "Deutsch",
            fr: "Français",
            "zh-CN": "简体中文",
        },
@@ -168,6 +172,179 @@ export default {
            selectedIndices: "selected indices",
        },
    },
    de: {
        filePage: {
          notFound: "Nicht gefunden"
        },
        searchBar: {
            simple: "Suche",
            advanced: "Erweiterte Suche",
            fuzzy: "Fuzzy"
        },
        addTag: "Tag",
        deleteTag: "Löschen",
        download: "Herunterladen",
        and: "und",
        page: "Seite",
        pages: "Seiten",
        mimeTypes: "Medientypen",
        tags: "Tags",
        tagFilter: "Tags filtern",
        forExample: "Zum Beispiel:",
        help: {
            simpleSearch: "Einfache Suche",
            advancedSearch: "Erweiterte Suche",
            help: "Hilfe",
            term: "<BEGRIFF>",
            and: "UND Operator",
            or: "ODER Operator",
            not: "negiert einen einzelnen Begriff",
            quotes: "liefert Treffer, wenn die Abfolge in der genauen Reihenfolge gefunden wird",
            prefix: "liefert Treffer, wenn die Abfolge einen solchen Präfix hat",
            parens: "gruppiert Ausdrücke",
            tildeTerm: "liefert Treffer, im gegebenen 'Editierabstand'",
            tildePhrase: "liefert Treffer, mit dem Ausdruck. Erfolgt die gegebene Anzahl zwischenstehnde Nicht-Treffer-Wörter.",
            example1:
                "Zum Beispiel: <code>\"fried eggs\" +(eggplant | potato) -frittata</code> wird " +
                "<i>fried eggs</i> und <i>eggplant</i> oder <i>potato</i> finden, aber keine Ergebnisse, " +
                "die <i>frittata</i> enthalten.",
            defaultOperator:
                "Wenn weder <code>+</code> noch <code>|</code> angegeben sind, ist " +
                "<code>+</code> (and) der Standard.",
            fuzzy:
                "Wenn <b>Fuzzy</b> aktiviert ist, werden Teil-Treffer (3-grams) ebenfalls akzeptiert.",
            moreInfoSimple: "Für weitere Informationen s.<a target=\"_blank\" " +
                "rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html\">Elasticsearch Dokumentation</a>",
            moreInfoAdvanced: "Für die Dokumentation der erweiterten Suche s. <a target=\"_blank\" rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax\">Elasticsearch Dokumentation</a>"
        },
        config: "Konfiguration",
        configDescription: "Konfiguration wird in Echtzeit für diesen Browser gespeichert.",
        configReset: "Konfiguration zurücksetzen",
        searchOptions: "Such-Optionen",
        treemapOptions: "Kacheldiagramm-Optionen",
        displayOptions: "Anzeige-Optionen",
        opt: {
            lang: "Sprache",
            highlight: "Aktiviere Hervorhebung von Treffern",
            fuzzy: "Aktiviere Fuzzy-Suche standardmäßig",
            searchInPath: "Abgleich der Abfrage mit dem Dokumentpfad aktivieren",
            suggestPath: "Aktiviere Auto-Vervollständigung in Pfadfilter-Leiste",
            fragmentSize: "Kontextgröße in Zeichen hervorheben",
            queryMode: "Such-Modus",
            displayMode: "Ansicht",
            columns: "Anzahl Spalten",
            treemapType: "Kacheldiagramme Typ",
            treemapTiling: "Kacheldiagramm Tiling",
            treemapColorGroupingDepth: "Kacheldiagramme Gruppierungsfarbe Tiefe (flach)",
            treemapColor: "Kacheldiagramme Farbe (kaskadiert)",
            treemapSize: "Kacheldiagramm Größe",
            theme: "Theme",
            lightboxLoadOnlyCurrent: "keine Bilder in voller Größe für benachbachte Slides im Image-Viewer vorab laden.",
            slideDuration: "Slide Dauer",
            resultSize: "Anzahl Treffer pro Seite",
            tagOrOperator: "Verwende ODER Operator bei der Angabe mehrere Tags.",
            hideDuplicates: "Verstecke Duplikate basierend auf der Prüfsumme",
            hideLegacy: "Verstecke die 'legacyES' Elasticsearch Notiz",
            updateMimeMap: "Aktualisiere Medientyp-Baum in Echtzeit",
            useDatePicker: "Benutze Datumswähler statt Schieber",
            vidPreviewInterval: "Videovorschau Framedauer in ms",
            simpleLightbox: "Schalte Animationen im Image-Viewer ab",
            showTagPickerFilter: "Zeige die Tag-Filter-Leiste",
            featuredFields: "Ausgewählte Felder Javascript Vorlage String. Wird in den Suchergebnissen angezeigt.",
            featuredFieldsList: "Verfügbare Variablen"
        },
        queryMode: {
            simple: "Einfach",
            advanced: "Erweitert",
        },
        lang: {
            en: "English",
            de: "Deutsch",
            fr: "Français",
            "zh-CN": "简体中文",
        },
        displayMode: {
            grid: "Gitter",
            list: "Liste",
        },
        columns: {
            auto: "Auto"
        },
        treemapType: {
            cascaded: "kaskadiert",
            flat: "flach (kompakt)"
        },
        treemapSize: {
            small: "klein",
            medium: "mittel",
            large: "groß",
            xLarge: "sehr groß",
            xxLarge: "riesig",
            custom: "eigene",
        },
        treemapTiling: {
            binary: "binär",
            squarify: "quadratisch",
            slice: "Slice",
            dice: "Dice",
            sliceDice: "Slice & Dice",
        },
        theme: {
            light: "Hell",
            black: "Dunkel"
        },
        hit: "Treffer",
        hits: "Treffer",
        details: "Details",
        stats: "Statistiken",
        queryTime: "Abfragedauer",
        totalSize: "Gesamtgröße",
        pathBar: {
            placeholder: "Filter Pfad",
            modalTitle: "Wähle Pfad"
        },
        debug: "Debug Informationen",
        debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
            "neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.",
        tagline: "Tagline",
        toast: {
            esConnErrTitle: "Elasticsearch Verbindungsfehler",
            esConnErr: "sist2 Web-Modul stellte einen Fehler beim Verbinden mit Elasticsearch fest. " +
                "Schau in die Server-Logs für weitere Informationen.",
            esQueryErrTitle: "Query Fehler",
            esQueryErr: "Konnte Query nicht verarbeiten/ausführen, bitte schaue in die Dokumentation zur erweiterten Suche. " +
                "Schau in die Server-Logs für weitere Informationen.",
            dupeTagTitle: "Tag Duplikat",
            dupeTag: "Dieser Tag existiert bereits für das Dokument.",
            copiedToClipboard: "In die Zwischenablage kopiert."
        },
        saveTagModalTitle: "Tag hinzufügen",
        saveTagPlaceholder: "Tag Name",
        confirm: "Bestätigen",
        indexPickerPlaceholder: "Index auswählen",
        sort: {
            relevance: "Relevanz",
            dateAsc: "Datum (älteste zuerst)",
            dateDesc: "Datum (neuste zuerst)",
            sizeAsc: "Größe (kleinste zuerst)",
            sizeDesc: "Größe (größte zuerst)",
            nameAsc: "Name (A-z)",
            nameDesc: "Name (Z-a)",
            random: "zufällig",
        },
        d3: {
            mimeCount: "Anzahlverteilung nach Medientyp",
            mimeSize: "Größenverteilung nach Medientyp",
            dateHistogram: "Verteilung der Änderungszeiten",
            sizeHistogram: "Verteilung der Dateigrößen",
        },
        indexPicker: {
            selectNone: "keinen auswählen",
            selectAll: "alle auswählen",
            selectedIndex: "ausgewählter Index",
            selectedIndices: "ausgewählte Indizes",
        },
    },
    fr: {
        filePage: {
            notFound: "Ficher introuvable"
@@ -177,7 +354,7 @@ export default {
            advanced: "Recherche avancée",
            fuzzy: "Approximatif"
        },
-        addTag: "Ajouter",
+        addTag: "Taguer",
        deleteTag: "Supprimer",
        download: "Télécharger",
        and: "et",
@@ -186,6 +363,7 @@ export default {
        mimeTypes: "Types de médias",
        tags: "Tags",
        tagFilter: "Filtrer les tags",
        forExample: "Par exemple:",
        help: {
            simpleSearch: "Recherche simple",
            advancedSearch: "Recherche avancée",
@@ -245,7 +423,9 @@ export default {
            useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
            vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
            simpleLightbox: "Désactiver les animations du visualiseur d'images",
-            showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
+            showTagPickerFilter: "Afficher le filtre dans l'onglet Tags",
            featuredFields: "Expression Javascript pour les variables mises en évidence. Sera affiché dans les résultats de recherche.",
            featuredFieldsList: "Variables disponibles"
        },
        queryMode: {
            simple: "Simple",
@@ -253,6 +433,7 @@ export default {
        },
        lang: {
            en: "English",
            de: "Deutsch",
            fr: "Français",
            "zh-CN": "简体中文",
        },
@@ -348,7 +529,7 @@ export default {
            advanced: "高级搜索",
            fuzzy: "模糊搜索"
        },
-        addTag: "添加",
+        addTag: "签条",
        deleteTag: "删除",
        download: "下载",
        and: "与",
@@ -357,6 +538,7 @@ export default {
        mimeTypes: "文件类型",
        tags: "标签",
        tagFilter: "筛选标签",
        forExample: "例如:",
        help: {
            simpleSearch: "简易搜索",
            advancedSearch: "高级搜索",
@@ -415,7 +597,9 @@ export default {
            useDatePicker: "使用日期选择器组件而不是滑块",
            vidPreviewInterval: "视频预览帧的持续时间，以毫秒为单位",
            simpleLightbox: "在图片查看器中，禁用动画",
-            showTagPickerFilter: "显示标签过滤栏"
+            showTagPickerFilter: "显示标签过滤栏",
            featuredFields: "特色领域的Javascript模板字符串。将出现在搜索结果中。",
            featuredFieldsList: "可利用的变量"
        },
        queryMode: {
            simple: "简单",
@@ -423,6 +607,7 @@ export default {
        },
        lang: {
            en: "English",
            de: "Deutsch",
            fr: "Français",
            "zh-CN": "简体中文",
        },
--- a/sist2-vue/src/store/index.ts
+++ b/sist2-vue/src/store/index.ts
@@ -33,6 +33,7 @@ export default new Vuex.Store({
        optHideDuplicates: true,
        optTheme: "light",
        optDisplay: "grid",
        optFeaturedFields: "",
        optSize: 60,
        optHighlight: true,
@@ -158,6 +159,7 @@ export default new Vuex.Store({
        setOptQueryMode: (state, val) => state.optQueryMode = val,
        setOptResultSize: (state, val) => state.optSize = val,
        setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
        setOptFeaturedFields: (state, val) => state.optFeaturedFields = val,
        setOptTreemapType: (state, val) => state.optTreemapType = val,
        setOptTreemapTiling: (state, val) => state.optTreemapTiling = val,
@@ -413,5 +415,6 @@ export default new Vuex.Store({
        optVidPreviewInterval: state => state.optVidPreviewInterval,
        optSimpleLightbox: state => state.optSimpleLightbox,
        optShowTagPickerFilter: state => state.optShowTagPickerFilter,
        optFeaturedFields: state => state.optFeaturedFields,
    }
 })
--- a/sist2-vue/src/util.ts
+++ b/sist2-vue/src/util.ts
@@ -57,6 +57,14 @@ export function humanTime(sec_num: number): string {
    const minutes = Math.floor((sec_num - (hours * 3600)) / 60);
    const seconds = sec_num - (hours * 3600) - (minutes * 60);
    if (sec_num < 60) {
        return `${sec_num}s`
    }
    if (sec_num < 3600) {
        return `${minutes < 10 ? "0" : ""}${minutes}:${seconds < 10 ? "0" : ""}${seconds}`;
    }
    return `${hours < 10 ? "0" : ""}${hours}:${minutes < 10 ? "0" : ""}${minutes}:${seconds < 10 ? "0" : ""}${seconds}`;
 }
--- a/sist2-vue/src/views/Configuration.vue
+++ b/sist2-vue/src/views/Configuration.vue
@@ -16,7 +16,9 @@
        <b-card>
-          <label><LanguageIcon/><span style="vertical-align: middle">&nbsp;{{ $t("opt.lang") }}</span></label>
+          <label>
            <LanguageIcon/>
            <span style="vertical-align: middle">&nbsp;{{ $t("opt.lang") }}</span></label>
          <b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
          <label>{{ $t("opt.theme") }}</label>
@@ -55,6 +57,62 @@
              $t("opt.showTagPickerFilter")
            }}
          </b-form-checkbox>
          <br/>
          <label>{{ $t("opt.featuredFields") }}</label>
          <br>
          <b-button v-b-toggle.collapse-1 variant="secondary" class="dropdown-toggle">{{
              $t("opt.featuredFieldsList")
            }}
          </b-button>
          <b-collapse id="collapse-1" class="mt-2">
            <ul>
              <li><code>doc.checksum</code></li>
              <li><code>doc.path</code></li>
              <li><code>doc.mime</code></li>
              <li><code>doc.videoc</code></li>
              <li><code>doc.audioc</code></li>
              <li><code>doc.pages</code></li>
              <li><code>doc.mtime</code></li>
              <li><code>doc.font_name</code></li>
              <li><code>doc.album</code></li>
              <li><code>doc.artist</code></li>
              <li><code>doc.title</code></li>
              <li><code>doc.genre</code></li>
              <li><code>doc.album_artist</code></li>
              <li><code>doc.exif_make</code></li>
              <li><code>doc.exif_model</code></li>
              <li><code>doc.exif_software</code></li>
              <li><code>doc.exif_exposure_time</code></li>
              <li><code>doc.exif_fnumber</code></li>
              <li><code>doc.exif_iso_speed_ratings</code></li>
              <li><code>doc.exif_focal_length</code></li>
              <li><code>doc.exif_user_comment</code></li>
              <li><code>doc.exif_user_comment</code></li>
              <li><code>doc.exif_gps_longitude_ref</code></li>
              <li><code>doc.exif_gps_longitude_dms</code></li>
              <li><code>doc.exif_gps_longitude_dec</code></li>
              <li><code>doc.exif_gps_latitude_ref</code></li>
              <li><code>doc.exif_gps_latitude_dec</code></li>
              <li><code>humanDate()</code></li>
              <li><code>humanFileSize()</code></li>
            </ul>
            <p>{{ $t("forExample") }}</p>
            <ul>
              <li>
                <code>&lt;b&gt;${humanDate(doc.mtime)}&lt;/b&gt; • ${doc.videoc || ''}</code>
              </li>
              <li>
                <code>${doc.pages ? (doc.pages + ' pages') : ''}</code>
              </li>
            </ul>
          </b-collapse>
          <br/>
          <br/>
          <b-textarea rows="3" :value="optFeaturedFields" @input="setOptFeaturedFields"></b-textarea>
        </b-card>
        <br/>
@@ -159,6 +217,7 @@ export default {
        {value: "en", text: this.$t("lang.en")},
        {value: "fr", text: this.$t("lang.fr")},
        {value: "zh-CN", text: this.$t("lang.zh-CN")},
        {value: "de", text: this.$t("lang.de")},
      ],
      queryModeOptions: [
        {value: "simple", text: this.$t("queryMode.simple")},
@@ -251,6 +310,7 @@ export default {
      "optVidPreviewInterval",
      "optSimpleLightbox",
      "optShowTagPickerFilter",
      "optFeaturedFields",
    ]),
    clientWidth() {
      return window.innerWidth;
@@ -294,6 +354,7 @@ export default {
      "setOptVidPreviewInterval",
      "setOptSimpleLightbox",
      "setOptShowTagPickerFilter",
      "setOptFeaturedFields",
    ]),
    onResetClick() {
      localStorage.removeItem("sist2_configuration");
--- a/src/auth0/auth0_c_api.h
+++ b/src/auth0/auth0_c_api.h
@@ -1,12 +1,13 @@
 #ifndef SIST2_AUTH0_C_API_H
 #define SIST2_AUTH0_C_API_H
 #include "stdlib.h"
 #ifdef __cplusplus
 #define EXTERNC extern "C"
 #include "cstdlib"
 #else
 #define EXTERNC
 #include "stdlib.h"
 #endif
 #define AUTH0_OK (0)
--- a/src/cli.c
+++ b/src/cli.c
@@ -2,16 +2,17 @@
 #include "ctx.h"
 #include <tesseract/capi.h>
-#define DEFAULT_OUTPUT "index.sist2/"
+#define DEFAULT_OUTPUT "index.sist2"
 #define DEFAULT_NAME "index"
 #define DEFAULT_CONTENT_SIZE 32768
-#define DEFAULT_QUALITY 1
+#define DEFAULT_QUALITY 2
-#define DEFAULT_THUMBNAIL_SIZE 500
+#define DEFAULT_THUMBNAIL_SIZE 552
 #define DEFAULT_THUMBNAIL_COUNT 1
 #define DEFAULT_REWRITE_URL ""
 #define DEFAULT_ES_URL "http://localhost:9200"
 #define DEFAULT_ES_INDEX "sist2"
-#define DEFAULT_BATCH_SIZE 100
+#define DEFAULT_BATCH_SIZE 70
 #define DEFAULT_TAGLINE "Lightning-fast file system indexer and search tool"
 #define DEFAULT_LANG "en"
@@ -20,8 +21,6 @@
 #define DEFAULT_MAX_MEM_BUFFER 2000
 #define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
 const char *TESS_DATAPATHS[] = {
        "/usr/share/tessdata/",
        "/usr/share/tesseract-ocr/tessdata/",
@@ -48,9 +47,6 @@ void scan_args_destroy(scan_args_t *args) {
    if (args->name != NULL) {
        free(args->name);
    }
    if (args->incremental != NULL) {
        free(args->incremental);
    }
    if (args->path != NULL) {
        free(args->path);
    }
@@ -61,7 +57,6 @@ void scan_args_destroy(scan_args_t *args) {
 }
 void index_args_destroy(index_args_t *args) {
    //todo
    if (args->es_mappings_path) {
        free(args->es_mappings);
    }
@@ -76,7 +71,6 @@ void index_args_destroy(index_args_t *args) {
 }
 void web_args_destroy(web_args_t *args) {
    //todo
    free(args);
 }
@@ -97,23 +91,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    char *abs_path = abspath(argv[1]);
    if (abs_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
    } else {
        abs_path = realloc(abs_path, strlen(abs_path) + 2);
        strcat(abs_path, "/");
        args->path = abs_path;
    }
    if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
        args->incremental = abspath(args->incremental);
        if (abs_path == NULL) {
            sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
            args->incremental = NULL;
        }
    }
    if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
        args->tn_quality = DEFAULT_QUALITY;
-    } else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
+    } else if (args->tn_quality < 2 || args->tn_quality > 31) {
-        fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
+        fprintf(stderr, "Invalid value for --thumbnail-quality argument: %d. Must be within [2, 31].\n",
                args->tn_quality);
        return 1;
    }
@@ -140,8 +128,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (args->threads == 0) {
        args->threads = 1;
-    } else if (args->threads < 0) {
+    } else if (args->threads < 0 || args->threads > 256) {
-        fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
+        fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number <= 256\n", args->threads);
        return 1;
    }
@@ -152,20 +140,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->output = expandpath(args->output);
    }
-    int ret = mkdir(args->output, S_IRUSR | S_IWUSR | S_IXUSR);
+    char *abs_output = abspath(args->output);
-    if (ret != 0) {
+    if (args->incremental && abs_output == NULL) {
-        fprintf(stderr, "Invalid output: '%s' (%s).\n", args->output, strerror(errno));
+        LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
-        return 1;
+        args->incremental = FALSE;
    } else if (!args->incremental && abs_output != NULL) {
        LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);
    }
    free(abs_output);
    if (args->depth <= 0) {
-        args->depth = G_MAXINT32;
+        args->depth = 2147483647;
    } else {
        args->depth += 1;
    }
    if (args->name == OPTION_VALUE_UNSPECIFIED) {
-        args->name = g_path_get_basename(args->output);
+        args->name = malloc(strlen(DEFAULT_NAME) + 1);
        strcpy(args->name, DEFAULT_NAME);
    } else {
        char *tmp = malloc(strlen(args->name) + 1);
        strcpy(tmp, args->name);
@@ -224,7 +216,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
            }
            if (trained_data_path != NULL && path != trained_data_path) {
                LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata "
-                                   "files must be in the same folder")
+                                   "files must be in the same folder");
            }
            trained_data_path = path;
@@ -232,7 +224,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
        free(lang);
-        ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
+        int ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
        if (ret != 0) {
            fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
            return 1;
@@ -249,12 +241,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
        if (error != NULL) {
-            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
+            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset);
        }
        pcre_extra *re_extra = pcre_study(re, 0, &error);
        if (error != NULL) {
-            LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
+            LOG_FATALF("cli.c", "pcre_study returned error: %s", error);
        }
        ScanCtx.exclude = re;
@@ -273,14 +265,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
    }
    if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
        args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
    }
    if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
        if (strcmp(args->list_path, "-") == 0) {
            args->list_file = stdin;
-            LOG_DEBUG("cli.c", "Using stdin as list file")
+            LOG_DEBUG("cli.c", "Using stdin as list file");
        } else {
            args->list_file = fopen(args->list_path, "r");
@@ -290,27 +278,27 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
    }
-    LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
+    LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality);
-    LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
+    LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size);
-    LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
+    LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count);
-    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
+    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size);
-    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
+    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads);
-    LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
+    LOG_DEBUGF("cli.c", "arg incremental=%d", args->incremental);
-    LOG_DEBUGF("cli.c", "arg output=%s", args->output)
+    LOG_DEBUGF("cli.c", "arg output=%s", args->output);
-    LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
+    LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url);
-    LOG_DEBUGF("cli.c", "arg name=%s", args->name)
+    LOG_DEBUGF("cli.c", "arg name=%s", args->name);
-    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
+    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth);
-    LOG_DEBUGF("cli.c", "arg path=%s", args->path)
+    LOG_DEBUGF("cli.c", "arg path=%s", args->path);
-    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
+    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive);
-    LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
+    LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase);
-    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
+    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang);
-    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
+    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path);
-    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
+    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex);
-    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
+    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast);
-    LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
+    LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub);
-    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
+    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold);
-    LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
+    LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib);
-    LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
+    LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path);
    return 0;
 }
@@ -320,20 +308,20 @@ int load_external_file(const char *file_path, char **dst) {
    int res = stat(file_path, &info);
    if (res == -1) {
-        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }
    int fd = open(file_path, O_RDONLY);
    if (fd == -1) {
-        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }
    *dst = malloc(info.st_size + 1);
    res = read(fd, *dst, info.st_size);
    if (res < 0) {
-        LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }
@@ -361,7 +349,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
    } else {
        args->index_path = index_path;
    }
@@ -396,28 +384,28 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
        args->batch_size = DEFAULT_BATCH_SIZE;
    }
-    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
-    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
-    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
-    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
+    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path);
-    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
-    LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
+    LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script);
    if (args->script) {
        char log_buf[5000];
        strncpy(log_buf, args->script, sizeof(log_buf));
        *(log_buf + sizeof(log_buf) - 1) = '\0';
-        LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+        LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
    }
-    LOG_DEBUGF("cli.c", "arg print=%d", args->print)
+    LOG_DEBUGF("cli.c", "arg print=%d", args->print);
-    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
+    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path);
-    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
+    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings);
-    LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
+    LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path);
-    LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
+    LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings);
-    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
+    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size);
-    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
+    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset);
    return 0;
 }
@@ -538,23 +526,24 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
-            LOG_FATALF("cli.c", "Index not found: %s", args->indices[i])
+            LOG_FATALF("cli.c", "Index not found: %s", args->indices[i]);
        }
        free(abs_path);
    }
-    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
-    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
-    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
-    LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
+    LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline);
-    LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
+    LOG_DEBUGF("cli.c", "arg dev=%d", args->dev);
-    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
+    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address);
-    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
+    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials);
-    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
+    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials);
-    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
+    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user);
-    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
+    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass);
-    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
+    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count);
    for (int i = 0; i < args->index_count; i++) {
-        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
+        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i]);
    }
    return 0;
@@ -579,7 +568,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]);
    } else {
        args->index_path = index_path;
    }
@@ -600,12 +589,12 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
        return 1;
    }
-    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
    char log_buf[5000];
    strncpy(log_buf, args->script, sizeof(log_buf));
    *(log_buf + sizeof(log_buf) - 1) = '\0';
-    LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+    LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
    return 0;
 }
--- a/src/cli.h
+++ b/src/cli.h
@@ -9,12 +9,12 @@
 #define OPTION_VALUE_UNSPECIFIED (0)
 typedef struct scan_args {
-    float tn_quality;
+    int tn_quality;
    int tn_size;
    int content_size;
    int threads;
-    int scan_mem_limit_mib;
+    int incremental;
-    char *incremental;
+    int optimize_database;
    char *output;
    char *rewrite_url;
    char *name;
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -3,9 +3,10 @@
 ScanCtx_t ScanCtx = {
        .stat_index_size = 0,
        .stat_tn_size = 0,
-        .dbg_current_files = NULL,
+        .pool = NULL,
-        .pool = NULL
+        .index.path = {0,},
 };
 WebCtx_t WebCtx;
 IndexCtx_t IndexCtx;
 LogCtx_t LogCtx;
 __thread ProcData_t ProcData;
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -16,47 +16,28 @@
 #include "libscan/msdoc/msdoc.h"
 #include "libscan/wpd/wpd.h"
 #include "libscan/json/json.h"
-#include "src/io/store.h"
+#include "src/database/database.h"
 #include "src/index/elastic.h"
 #include "sqlite3.h"
 #include <glib.h>
 #include <pcre.h>
 typedef struct {
    struct index_t index;
    GHashTable *mime_table;
    GHashTable *ext_table;
    tpool_t *pool;
    tpool_t *writer_pool;
    int threads;
    int depth;
    int calculate_checksums;
    size_t mem_limit;
    size_t stat_tn_size;
    size_t stat_index_size;
    GHashTable *original_table;
    GHashTable *copy_table;
    GHashTable *new_table;
    pthread_mutex_t copy_table_mu;
    pcre *exclude;
    pcre_extra *exclude_extra;
    int fast;
    GHashTable *dbg_current_files;
    pthread_mutex_t dbg_current_files_mu;
    int dbg_failed_files_count;
    int dbg_skipped_files_count;
    int dbg_excluded_files_count;
    pthread_mutex_t dbg_file_counts_mu;
    scan_arc_ctx_t arc_ctx;
    scan_comic_ctx_t comic_ctx;
    scan_ebook_ctx_t ebook_ctx;
@@ -85,10 +66,6 @@ typedef struct {
    char *es_index;
    int batch_size;
    tpool_t *pool;
    store_t *tag_store;
    GHashTable *tags;
    store_t *meta_store;
    GHashTable *meta;
    /**
     * Set to false when using --print
     */
@@ -118,10 +95,18 @@ typedef struct {
    int dev;
 } WebCtx_t;
 typedef struct {
    int thread_id;
    database_t *ipc_db;
    database_t *index_db;
 } ProcData_t;
 extern ScanCtx_t ScanCtx;
 extern WebCtx_t WebCtx;
 extern IndexCtx_t IndexCtx;
 extern LogCtx_t LogCtx;
 extern __thread ProcData_t ProcData;
 #endif
--- a/src/database/database.c
+++ b/src/database/database.c
@@ -0,0 +1,626 @@
 #include "database.h"
 #include "malloc.h"
 #include "src/ctx.h"
 #include <string.h>
 #include <pthread.h>
 #include "src/util.h"
 #include <time.h>
 database_t *database_create(const char *filename, database_type_t type) {
    database_t *db = malloc(sizeof(database_t));
    strcpy(db->filename, filename);
    db->type = type;
    db->select_thumbnail_stmt = NULL;
    db->ipc_ctx = NULL;
    return db;
 }
 __always_inline
 static int sep_rfind(const char *str) {
    for (int i = (int) strlen(str); i >= 0; i--) {
        if (str[i] == '/') {
            return i;
        }
    }
    return -1;
 }
 void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
    if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
        sqlite3_result_error(ctx, "Invalid parameters", -1);
    }
    const char *value = (const char *) sqlite3_value_text(argv[0]);
    int stop = sep_rfind(value);
    if (stop == -1) {
        sqlite3_result_null(ctx);
        return;
    }
    char parent[PATH_MAX * 3];
    strncpy(parent, value, stop);
    sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
 }
 void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
    if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
        sqlite3_result_error(ctx, "Invalid parameters", -1);
    }
    database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx);
    const char *current_job = (const char *) sqlite3_value_text(argv[0]);
    char buf[PATH_MAX];
    strcpy(buf, current_job);
    strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
    sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
 }
 void database_initialize(database_t *db) {
    CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
    LOG_DEBUGF("database.c", "Initializing database %s", db->filename);
    if (db->type == INDEX_DATABASE) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IndexDatabaseSchema, NULL, NULL, NULL));
    } else if (db->type == IPC_CONSUMER_DATABASE || db->type == IPC_PRODUCER_DATABASE) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IpcDatabaseSchema, NULL, NULL, NULL));
    }
    sqlite3_close(db->db);
 }
 void database_open(database_t *db) {
    LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
    CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
    if (db->type == INDEX_DATABASE) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL));
    }
    if (db->type == INDEX_DATABASE) {
        // Prepare statements;
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "SELECT data FROM thumbnail WHERE id=? AND num=? LIMIT 1;", -1,
                &db->select_thumbnail_stmt, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id",
                -1,
                &db->mark_document_stmt, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1,
                &db->write_document_sidecar_stmt, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "REPLACE INTO document (id, mtime, size, json_data) VALUES (?, ?, ?, ?);", -1,
                &db->write_document_stmt, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
                -1,
                &db->write_thumbnail_stmt, NULL));
        // Create functions
        sqlite3_create_function(
                db->db,
                "path_parent",
                1,
                SQLITE_UTF8,
                NULL,
                path_parent_func,
                NULL,
                NULL
        );
    } else if (db->type == IPC_CONSUMER_DATABASE) {
        sqlite3_create_function(
                db->db,
                "save_current_job_info",
                1,
                SQLITE_UTF8,
                db->ipc_ctx,
                save_current_job_info,
                NULL,
                NULL
        );
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "DELETE FROM parse_job WHERE id = (SELECT MIN(id) FROM parse_job)"
                " RETURNING filepath,mtime,st_size,save_current_job_info(filepath);",
                -1, &db->pop_parse_job_stmt, NULL
        ));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db,
                "DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)"
                " RETURNING doc_id,type,line;",
                -1, &db->pop_index_job_stmt, NULL
        ));
    } else if (db->type == IPC_PRODUCER_DATABASE) {
        char sql[40];
        int max_size_mb = 10; // TODO: read from args.
        snprintf(sql, sizeof(sql), "PRAGMA max_page_count=%d", (max_size_mb * 1024 * 1024) / 4096);
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, sql, NULL, NULL, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1,
                &db->insert_parse_job_stmt, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
                db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
                &db->insert_index_job_stmt, NULL));
        sqlite3_create_function(
                db->db,
                "path_parent",
                1,
                SQLITE_UTF8,
                NULL,
                path_parent_func,
                NULL,
                NULL
        );
    }
 }
 void database_close(database_t *db, int optimize) {
    LOG_DEBUGF("database.c", "Closing database %s", db->filename);
    if (optimize) {
        LOG_DEBUG("database.c", "Optimizing database");
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
    }
    sqlite3_close(db->db);
    if (db->type == IPC_PRODUCER_DATABASE) {
        remove(db->filename);
    }
    free(db);
    db = NULL;
 }
 void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) {
    sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
    sqlite3_bind_int(db->select_thumbnail_stmt, 2, num);
    int ret = sqlite3_step(db->select_thumbnail_stmt);
    if (ret == SQLITE_DONE) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
        *return_value_len = 0;
        return NULL;
    }
    CRASH_IF_STMT_FAIL(ret);
    const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
    const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
    *return_value_len = blob_size;
    void *return_data = malloc(blob_size);
    memcpy(return_data, blob, blob_size);
    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
    return return_data;
 }
 void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) {
    sqlite3_exec(db->db, "DELETE FROM descriptor;", NULL, NULL, NULL);
    sqlite3_stmt *stmt;
    sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch,"
                               " root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL);
    sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC);
    sqlite3_bind_int(stmt, 2, desc->version_major);
    sqlite3_bind_int(stmt, 3, desc->version_minor);
    sqlite3_bind_int(stmt, 4, desc->version_patch);
    sqlite3_bind_text(stmt, 5, desc->root, -1, SQLITE_STATIC);
    sqlite3_bind_text(stmt, 6, desc->name, -1, SQLITE_STATIC);
    sqlite3_bind_text(stmt, 7, desc->rewrite_url, -1, SQLITE_STATIC);
    sqlite3_bind_int64(stmt, 8, desc->timestamp);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    sqlite3_finalize(stmt);
 }
 index_descriptor_t *database_read_index_descriptor(database_t *db) {
    sqlite3_stmt *stmt;
    sqlite3_prepare_v2(db->db, "SELECT id, version_major, version_minor, version_patch,"
                               " root, name, rewrite_url, timestamp FROM descriptor;", -1, &stmt, NULL);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    const char *id = (char *) sqlite3_column_text(stmt, 0);
    int v_major = sqlite3_column_int(stmt, 1);
    int v_minor = sqlite3_column_int(stmt, 2);
    int v_patch = sqlite3_column_int(stmt, 3);
    const char *root = (char *) sqlite3_column_text(stmt, 4);
    const char *name = (char *) sqlite3_column_text(stmt, 5);
    const char *rewrite_url = (char *) sqlite3_column_text(stmt, 6);
    int timestamp = sqlite3_column_int(stmt, 7);
    index_descriptor_t *desc = malloc(sizeof(index_descriptor_t));
    strcpy(desc->id, id);
    snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch);
    desc->version_major = v_major;
    desc->version_minor = v_minor;
    desc->version_patch = v_patch;
    strcpy(desc->root, root);
    strcpy(desc->name, name);
    strcpy(desc->rewrite_url, rewrite_url);
    desc->timestamp = timestamp;
    CRASH_IF_NOT_SQLITE_OK(sqlite3_finalize(stmt));
    return desc;
 }
 database_iterator_t *database_create_delete_list_iterator(database_t *db) {
    sqlite3_stmt *stmt;
    sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
    iter->stmt = stmt;
    iter->db = db;
    return iter;
 }
 char *database_delete_list_iter(database_iterator_t *iter) {
    int ret = sqlite3_step(iter->stmt);
    if (ret == SQLITE_ROW) {
        const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
        char *id_heap = malloc(strlen(id) + 1);
        strcpy(id_heap, id);
        return id_heap;
    }
    if (ret != SQLITE_DONE) {
        LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
    }
    if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
        LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
    }
    iter->stmt = NULL;
    return NULL;
 }
 database_iterator_t *database_create_document_iterator(database_t *db) {
    sqlite3_stmt *stmt;
    // TODO optimization: remove mtime, size, _id from json_data
    sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
                               " WHEN sc.json_data IS NULL THEN"
                               "  CASE"
                               "   WHEN t.tag IS NULL THEN"
                               "    document.json_data"
                               "   ELSE"
                               "    json_set(document.json_data, '$.tag', json_group_array(t.tag))"
                               "   END"
                               " ELSE"
                               "  CASE"
                               "   WHEN t.tag IS NULL THEN"
                               "    json_patch(document.json_data, sc.json_data)"
                               "   ELSE"
                               //   This will overwrite any tags specified in the sidecar file!
                               //   TODO: concatenate the two arrays?
                               "    json_set(json_patch(document.json_data, sc.json_data), '$.tag', json_group_array(t.tag))"
                               "   END"
                               " END"
                               " FROM document"
                               " LEFT JOIN document_sidecar sc ON document.id = sc.id"
                               " LEFT JOIN tag t ON document.id = t.id"
                               " GROUP BY document.id)"
                               " SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc", -1, &stmt, NULL);
    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
    iter->stmt = stmt;
    iter->db = db;
    return iter;
 }
 cJSON *database_document_iter(database_iterator_t *iter) {
    if (iter->stmt == NULL) {
        LOG_ERROR("database.c", "FIXME: database_document_iter() called after iteration stopped");
        return NULL;
    }
    int ret = sqlite3_step(iter->stmt);
    if (ret == SQLITE_ROW) {
        const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0);
        return cJSON_Parse(json_string);
    }
    if (ret != SQLITE_DONE) {
        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
    }
    if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
    }
    iter->stmt = NULL;
    return NULL;
 }
 cJSON *database_incremental_scan_begin(database_t *db) {
    LOG_DEBUG("database.c", "Preparing database for incremental scan");
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL));
 }
 cJSON *database_incremental_scan_end(database_t *db) {
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
            db->db,
            "DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);",
            NULL, NULL, NULL
    ));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
            db->db,
            "DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);",
            NULL, NULL, NULL
    ));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
            db->db,
            "INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0;",
            NULL, NULL, NULL
    ));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
            db->db,
            "DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);",
            NULL, NULL, NULL
    ));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
            db->db,
            "DELETE FROM document WHERE marked=0;",
            NULL, NULL, NULL
    ));
 }
 int database_mark_document(database_t *db, const char *id, int mtime) {
    sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC);
    sqlite3_bind_int(db->mark_document_stmt, 2, mtime);
    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
    int ret = sqlite3_step(db->mark_document_stmt);
    if (ret == SQLITE_ROW) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
        pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
        return TRUE;
    }
    if (ret == SQLITE_DONE) {
        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
        pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
        return FALSE;
    }
    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
    CRASH_IF_STMT_FAIL(ret);
 }
 void database_write_document(database_t *db, document_t *doc, const char *json_data) {
    sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC);
    sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime);
    sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size);
    sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC);
    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt));
    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
 }
 void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) {
    sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC);
    sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC);
    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt));
    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
 }
 void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) {
    sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
    sqlite3_bind_int(db->write_thumbnail_stmt, 2, num);
    sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC);
    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_thumbnail_stmt));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_thumbnail_stmt));
    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
 }
 //void database_create_fts_index(database_t *db, database_t *fts_db) {
 //    // In a separate file,
 //
 //    // use database_initialize() to create FTS schema
 //    // if --force-reset, then truncate the tables first
 //
 //    /*
 //     * create/append fts table
 //     *
 //     * create/append scalar index table with
 //     *  id,index,size,mtime,mime
 //     *
 //     * create/append path index table with
 //     *  index,path,depth
 //     *
 //     * content table is a view with SELECT UNION for all attached tables
 //     *  random_seed column
 //     */
 //
 //    // INSERT INTO ft(ft) VALUES('optimize');
 //}
 job_t *database_get_work(database_t *db, job_type_t job_type) {
    job_t *job;
    pthread_mutex_lock(&db->ipc_ctx->mutex);
    while (db->ipc_ctx->job_count == 0 && !db->ipc_ctx->no_more_jobs) {
        pthread_cond_timedwait_ms(&db->ipc_ctx->has_work_cond, &db->ipc_ctx->mutex, 10);
    }
    pthread_mutex_unlock(&db->ipc_ctx->mutex);
    pthread_mutex_lock(&db->ipc_ctx->db_mutex);
    if (job_type == JOB_PARSE_JOB) {
        int ret = sqlite3_step(db->pop_parse_job_stmt);
        if (ret == SQLITE_DONE) {
            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
            pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
            return NULL;
        } else {
            CRASH_IF_STMT_FAIL(ret);
        }
        job = malloc(sizeof(*job));
        job->parse_job = create_parse_job(
                (const char *) sqlite3_column_text(db->pop_parse_job_stmt, 0),
                sqlite3_column_int(db->pop_parse_job_stmt, 1),
                sqlite3_column_int64(db->pop_parse_job_stmt, 2));
        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
    } else {
        int ret = sqlite3_step(db->pop_index_job_stmt);
        if (ret == SQLITE_DONE) {
            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
            pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
            return NULL;
        }
        CRASH_IF_STMT_FAIL(ret);
        job = malloc(sizeof(*job));
        const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
        if (line != NULL) {
            job->bulk_line = malloc(sizeof(es_bulk_line_t) + strlen(line) + 1);
            strcpy(job->bulk_line->line, line);
        } else {
            job->bulk_line = malloc(sizeof(es_bulk_line_t));
        }
        strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
        job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
        job->bulk_line->next = NULL;
        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
    }
    pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
    pthread_mutex_lock(&db->ipc_ctx->mutex);
    db->ipc_ctx->job_count -= 1;
    pthread_mutex_unlock(&db->ipc_ctx->mutex);
    job->type = job_type;
    return job;
 }
 void database_add_work(database_t *db, job_t *job) {
    int ret;
    pthread_mutex_lock(&db->ipc_ctx->db_mutex);
    if (job->type == JOB_PARSE_JOB) {
        do {
            sqlite3_bind_text(db->insert_parse_job_stmt, 1, job->parse_job->filepath, -1, SQLITE_STATIC);
            sqlite3_bind_int(db->insert_parse_job_stmt, 2, job->parse_job->vfile.mtime);
            sqlite3_bind_int64(db->insert_parse_job_stmt, 3, (long) job->parse_job->vfile.st_size);
            ret = sqlite3_step(db->insert_parse_job_stmt);
            if (ret == SQLITE_FULL) {
                usleep(1000000);
            } else {
                CRASH_IF_STMT_FAIL(ret);
            }
            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->insert_parse_job_stmt));
        } while (ret != SQLITE_DONE);
    } else if (job->type == JOB_BULK_LINE) {
        do {
            sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
            sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
            sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
            ret = sqlite3_step(db->insert_index_job_stmt);
            if (ret == SQLITE_FULL) {
                sqlite3_reset(db->insert_index_job_stmt);
                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
                usleep(100000);
                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
                continue;
            } else {
                CRASH_IF_STMT_FAIL(ret);
            }
            ret = sqlite3_reset(db->insert_index_job_stmt);
            if (ret == SQLITE_FULL) {
                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
                usleep(100000);
                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
            }
        } while (ret != SQLITE_DONE && ret != SQLITE_OK);
    } else {
        LOG_FATAL("database.c", "FIXME: invalid job type");
    }
    pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
    pthread_mutex_lock(&db->ipc_ctx->mutex);
    db->ipc_ctx->job_count += 1;
    pthread_cond_signal(&db->ipc_ctx->has_work_cond);
    pthread_mutex_unlock(&db->ipc_ctx->mutex);
 }
--- a/src/database/database.h
+++ b/src/database/database.h
@@ -0,0 +1,155 @@
 #ifndef SIST2_DATABASE_H
 #define SIST2_DATABASE_H
 #include <sqlite3.h>
 #include <cjson/cJSON.h>
 #include "src/sist.h"
 #include "src/index/elastic.h"
 typedef struct index_descriptor index_descriptor_t;
 extern const char *IpcDatabaseSchema;
 extern const char *IndexDatabaseSchema;
 typedef enum {
    INDEX_DATABASE,
    IPC_CONSUMER_DATABASE,
    IPC_PRODUCER_DATABASE,
    FTS_DATABASE
 } database_type_t;
 typedef enum {
    JOB_UNDEFINED,
    JOB_BULK_LINE,
    JOB_PARSE_JOB
 } job_type_t;
 typedef struct {
    job_type_t type;
    union {
        parse_job_t *parse_job;
        es_bulk_line_t *bulk_line;
    };
 } job_t;
 typedef struct {
    int job_count;
    int no_more_jobs;
    int completed_job_count;
    pthread_mutex_t mutex;
    pthread_mutex_t db_mutex;
    pthread_mutex_t index_db_mutex;
    pthread_cond_t has_work_cond;
    char current_job[MAX_THREADS][PATH_MAX * 2];
 } database_ipc_ctx_t;
 typedef struct database {
    char filename[PATH_MAX];
    database_type_t type;
    sqlite3 *db;
    // Prepared statements
    sqlite3_stmt *select_thumbnail_stmt;
    sqlite3_stmt *treemap_merge_up_update_stmt;
    sqlite3_stmt *treemap_merge_up_delete_stmt;
    sqlite3_stmt *mark_document_stmt;
    sqlite3_stmt *write_document_stmt;
    sqlite3_stmt *write_document_sidecar_stmt;
    sqlite3_stmt *write_thumbnail_stmt;
    sqlite3_stmt *insert_parse_job_stmt;
    sqlite3_stmt *insert_index_job_stmt;
    sqlite3_stmt *pop_parse_job_stmt;
    sqlite3_stmt *pop_index_job_stmt;
    database_ipc_ctx_t *ipc_ctx;
 } database_t;
 typedef struct {
    database_t *db;
    sqlite3_stmt *stmt;
 } database_iterator_t;
 typedef struct {
    const char *path;
    const char *parent;
    long size;
 } treemap_row_t;
 static treemap_row_t null_treemap_row = {0, 0, 0};
 database_t *database_create(const char *filename, database_type_t type);
 void database_initialize(database_t *db);
 void database_open(database_t *db);
 void database_close(database_t *, int optimize);
 void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size);
 void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len);
 void database_write_index_descriptor(database_t *db, index_descriptor_t *desc);
 index_descriptor_t *database_read_index_descriptor(database_t *db);
 void database_write_document(database_t *db, document_t *doc, const char *json_data);
 database_iterator_t *database_create_document_iterator(database_t *db);
 cJSON *database_document_iter(database_iterator_t *);
 #define database_document_iter_foreach(element, iter) \
    for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
 database_iterator_t *database_create_delete_list_iterator(database_t *db);
 char * database_delete_list_iter(database_iterator_t *iter);
 #define database_delete_list_iter_foreach(element, iter) \
    for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
 cJSON *database_incremental_scan_begin(database_t *db);
 cJSON *database_incremental_scan_end(database_t *db);
 int database_mark_document(database_t *db, const char *id, int mtime);
 void database_write_document_sidecar(database_t *db, const char *id, const char *json_data);
 database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold);
 treemap_row_t database_treemap_iter(database_iterator_t *iter);
 #define database_treemap_iter_foreach(element, iter) \
    for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter))
 void database_generate_stats(database_t *db, double treemap_threshold);
 job_t *database_get_work(database_t *db, job_type_t job_type);
 void database_add_work(database_t *db, job_t *job);
 //void database_index(database_t *db);
 #define CRASH_IF_STMT_FAIL(x) do { \
        int return_value = x;                \
        if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) {     \
            LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
        }                           \
    } while (0)
 #define CRASH_IF_NOT_SQLITE_OK(x) do { \
        int return_value = x;                \
        if (return_value != SQLITE_OK) {     \
            LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
        }                           \
    } while (0)
 #endif //SIST2_DATABASE_H
--- a/src/database/database_schema.c
+++ b/src/database/database_schema.c
@@ -0,0 +1,78 @@
 const char *IpcDatabaseSchema =
        "CREATE TABLE parse_job ("
        "   id INTEGER PRIMARY KEY,"
        "   filepath TEXT NOT NULL,"
        "   mtime INTEGER NOT NULL,"
        "   st_size INTEGER NOT NULL"
        ");"
        ""
        "CREATE TABLE index_job ("
        "   id INTEGER PRIMARY KEY,"
        "   doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 ),"
        "   type INTEGER NOT NULL,"
        "   line TEXT"
        ");";
 const char *IndexDatabaseSchema =
        "CREATE TABLE thumbnail ("
        "   id TEXT NOT NULL CHECK ( length(id) = 32 ),"
        "   num INTEGER NOT NULL,"
        "   data BLOB NOT NULL,"
        "   PRIMARY KEY(id, num)"
        ") WITHOUT ROWID;"
        ""
        "CREATE TABLE document ("
        "   id TEXT PRIMARY KEY CHECK ( length(id) = 32 ),"
        "   marked INTEGER NOT NULL DEFAULT (1),"
        "   mtime INTEGER NOT NULL,"
        "   size INTEGER NOT NULL,"
        "   json_data TEXT NOT NULL CHECK ( json_valid(json_data) )"
        ") WITHOUT ROWID;"
        ""
        "CREATE TABLE delete_list ("
        "   id TEXT PRIMARY KEY CHECK ( length(id) = 32 )"
        ") WITHOUT ROWID;"
        ""
        "CREATE TABLE tag ("
        "   id TEXT NOT NULL,"
        "   tag TEXT NOT NULL"
        ");"
        ""
        "CREATE TABLE document_sidecar ("
        "   id TEXT PRIMARY KEY NOT NULL,"
        "   json_data TEXT NOT NULL"
        ") WITHOUT ROWID;"
        ""
        "CREATE TABLE descriptor ("
        "   id TEXT NOT NULL,"
        "   version_major INTEGER NOT NULL,"
        "   version_minor INTEGER NOT NULL,"
        "   version_patch INTEGER NOT NULL,"
        "   root TEXT NOT NULL,"
        "   name TEXT NOT NULL,"
        "   rewrite_url TEXT,"
        "   timestamp INTEGER NOT NULL"
        ");"
        ""
        "CREATE TABLE stats_treemap ("
        "   path TEXT NOT NULL,"
        "   size INTEGER NOT NULL"
        ");"
        ""
        "CREATE TABLE stats_size_agg ("
        "   bucket INTEGER NOT NULL,"
        "   count INTEGER NOT NULL"
        ");"
        ""
        "CREATE TABLE stats_date_agg ("
        "   bucket INTEGER NOT NULL,"
        "   count INTEGER NOT NULL"
        ");"
        ""
        "CREATE TABLE stats_mime_agg ("
        "   mime TEXT NOT NULL,"
        "   size INTEGER NOT NULL,"
        "   count INTEGER NOT NULL"
        ");";
--- a/src/database/database_stats.c
+++ b/src/database/database_stats.c
@@ -0,0 +1,159 @@
 #include "database.h"
 #include "src/sist.h"
 #include "src/ctx.h"
 #define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
 #define SIZE_BUCKET (long)(5 * 1000 * 1000)
 #define DATE_BUCKET (long)(2629800) // ~30 days
 database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
    sqlite3_stmt *stmt;
    sqlite3_prepare_v2(db->db,
                       "SELECT path, path_parent(path), size FROM tm"
                       " WHERE path_parent(path) IN (SELECT path FROM tm)"
                       " AND size<?",
                       -1, &stmt, NULL);
    sqlite3_bind_int64(stmt, 1, threshold);
    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
    iter->stmt = stmt;
    iter->db = db;
    return iter;
 }
 treemap_row_t database_treemap_iter(database_iterator_t *iter) {
    if (iter->stmt == NULL) {
        LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
    }
    int ret = sqlite3_step(iter->stmt);
    if (ret == SQLITE_ROW) {
        treemap_row_t row = {
                .path = (const char *) sqlite3_column_text(iter->stmt, 0),
                .parent = (const char *) sqlite3_column_text(iter->stmt, 1),
                .size = sqlite3_column_int64(iter->stmt, 2)
        };
        return row;
    }
    if (ret != SQLITE_DONE) {
        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
    }
    sqlite3_finalize(iter->stmt);
    iter->stmt = NULL;
    return (treemap_row_t) {NULL, NULL, 0};
 }
 void database_generate_stats(database_t *db, double treemap_threshold) {
    LOG_INFO("database.c", "Generating stats");
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
    CRASH_IF_NOT_SQLITE_OK(
            sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
    sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
    sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
    // size aggregation
    sqlite3_stmt *stmt;
    sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
                               " SELECT"
                               "  cast(size / ?1 as int) * ?1 as bucket,"
                               "  count(*) as count"
                               " FROM document"
                               " GROUP BY bucket", -1, &stmt, NULL);
    sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    sqlite3_finalize(stmt);
    // date aggregation
    sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
                               " SELECT"
                               "  cast(mtime / ?1 as int) * ?1 as bucket,"
                               "  count(*) as count"
                               " FROM document"
                               " GROUP BY bucket", -1, &stmt, NULL);
    sqlite3_bind_int(stmt, 1, DATE_BUCKET);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    sqlite3_finalize(stmt);
    // mime aggregation
    sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
                               " SELECT"
                               "  (json_data->>'mime') as bucket,"
                               "  sum(size),"
                               "  count(*)"
                               " FROM document"
                               " WHERE bucket IS NOT NULL"
                               " GROUP BY bucket", -1, &stmt, NULL);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    sqlite3_finalize(stmt);
    // Treemap
    sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
    long total_size = sqlite3_column_int64(stmt, 0);
    long threshold = (long) ((double) total_size * treemap_threshold);
    sqlite3_finalize(stmt);
    // flat map
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
                                        "INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
                                        " FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
                                        NULL, NULL, NULL));
    // Merge up
    int merged_rows = 0;
    do {
        if (merged_rows) {
            LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
        }
        merged_rows = 0;
        sqlite3_prepare_v2(db->db,
                           "INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
                           " FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
                           " ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
        sqlite3_bind_int64(stmt, 1, threshold);
        CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
        database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
        database_treemap_iter_foreach(row, iter) {
            sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
            sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
            CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
            sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
            CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
            merged_rows += 1;
        }
    } while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
                                        "INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
                                        NULL, NULL, NULL));
    LOG_INFO("database.c", "Done!");
 }
--- a/src/database/database_stats.h
+++ b/src/database/database_stats.h
@@ -0,0 +1,5 @@
 #ifndef SIST2_DATABASE_STATS_H
 #define SIST2_DATABASE_STATS_H
 #endif //SIST2_DATABASE_STATS_H
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -29,7 +29,7 @@ void destroy_indexer(es_indexer_t *indexer) {
        return;
    }
-    LOG_DEBUG("elastic.c", "Destroying indexer")
+    LOG_DEBUG("elastic.c", "Destroying indexer");
    if (indexer->es_url != NULL) {
        free(indexer->es_url);
@@ -64,18 +64,21 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
    cJSON_Delete(line);
 }
-void index_json_func(void *arg) {
+void index_json_func(job_t *job) {
-    es_bulk_line_t *line = arg;
+    elastic_index_line(job->bulk_line);
    elastic_index_line(line);
 }
-void delete_document(const char* document_id_str, void* UNUSED(_data)) {
+void delete_document(const char *document_id) {
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
    bulk_line->type = ES_BULK_LINE_DELETE;
    bulk_line->next = NULL;
    strcpy(bulk_line->doc_id, document_id);
-    strcpy(bulk_line->doc_id, document_id_str);
+    tpool_add_work(IndexCtx.pool, &(job_t) {
-    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
+            .type = JOB_BULK_LINE,
            .bulk_line = bulk_line,
    });
 }
@@ -92,7 +95,10 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
    bulk_line->next = NULL;
    cJSON_free(json);
-    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
+    tpool_add_work(IndexCtx.pool, &(job_t) {
        .type = JOB_BULK_LINE,
        .bulk_line = bulk_line,
    });
 }
 void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
@@ -266,7 +272,7 @@ void print_error(response_t *r) {
 void _elastic_flush(int max) {
    if (max == 0) {
-        LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
+        LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue");
        return;
    }
@@ -279,13 +285,13 @@ void _elastic_flush(int max) {
    response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);
    if (r->status_code == 0) {
-        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
+        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url);
    }
    if (r->status_code == 413) {
        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id);
            free_response(r);
            free(buf);
            free_queue(1);
@@ -306,7 +312,7 @@ void _elastic_flush(int max) {
        free_response(r);
        free(buf);
-        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
+        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay");
        usleep(1000000 * 20);
        _elastic_flush(max);
        return;
@@ -441,7 +447,7 @@ es_version_t *elastic_get_version(const char *es_url, int insecure) {
    }
    if (cJSON_GetObjectItem(response, "error") != NULL) {
-        LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
+        LOG_WARNING("elastic.c", "Could not get Elasticsearch version");
        print_error(r);
        free_response(r);
        return NULL;
@@ -477,7 +483,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
    IndexCtx.es_version = es_version;
    if (es_version == NULL) {
-        LOG_FATAL("elastic.c", "Could not get ES version")
+        LOG_FATAL("elastic.c", "Could not get ES version");
    }
    LOG_INFOF("elastic.c",
@@ -485,7 +491,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
              format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));
    if (!IS_SUPPORTED_ES_VERSION(es_version)) {
-        LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
+        LOG_FATAL("elastic.c", "This elasticsearch version is not supported!");
    }
    char *settings = NULL;
@@ -512,7 +518,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not create index")
+            LOG_FATAL("elastic.c", "Could not create index");
        }
        LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
@@ -533,12 +539,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not update user settings")
+            LOG_FATAL("elastic.c", "Could not update user settings");
        }
        free_response(r);
        if (IS_LEGACY_VERSION(es_version)) {
-            snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
+            snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url,
                     IndexCtx.es_index);
        } else {
            snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
        }
@@ -547,7 +554,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not update user mappings")
+            LOG_FATAL("elastic.c", "Could not update user mappings");
        }
        free_response(r);
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -46,7 +46,7 @@ void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
 void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
-void delete_document(const char *document_id_str, void* data);
+void delete_document(const char *document_id);
 es_indexer_t *create_indexer(const char *url, const char *index);
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@@ -65,7 +65,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
        curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
        if (req->response->status_code == 0) {
-            LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
+            LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer);
        }
        curl_multi_cleanup(req->multi);
@@ -104,7 +104,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
    curl_multi_add_handle(req->multi, curl);
    curl_multi_perform(req->multi, &req->running_handles);
-    LOG_DEBUGF("web.c", "async request POST %s", url)
+    LOG_DEBUGF("web.c", "async request POST %s", url);
    return req;
 }
@@ -136,7 +136,7 @@ response_t *web_get(const char *url, int timeout, int insecure) {
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
    if (resp->status_code == 0) {
-        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
    }
    curl_easy_cleanup(curl);
@@ -180,7 +180,7 @@ response_t *web_post(const char *url, const char *data, int insecure) {
    resp->size = buffer.cur;
    if (resp->status_code == 0) {
-        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
    }
    curl_easy_cleanup(curl);
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -1,9 +1,7 @@
 #include "src/ctx.h"
 #include "serialize.h"
 #include "src/parsing/parse.h"
 #include "src/parsing/mime.h"
 #include <zstd.h>
 char *get_meta_key_text(enum metakey meta_key) {
@@ -79,7 +77,7 @@ char *get_meta_key_text(enum metakey meta_key) {
        case MetaChecksum:
            return "checksum";
        default:
-        LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
+        LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key);
    }
 }
@@ -175,7 +173,7 @@ char *build_json_string(document_t *doc) {
                break;
            }
            default:
-            LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key))
+            LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key));
        }
        meta_line_t *tmp = meta;
@@ -189,391 +187,10 @@ char *build_json_string(document_t *doc) {
    return json_str;
 }
 static struct {
    FILE *out_file;
    size_t buf_out_size;
    void *buf_out;
    ZSTD_CCtx *cctx;
 } WriterCtx = {
        .out_file =  NULL
 };
 #define ZSTD_COMPRESSION_LEVEL 10
 void initialize_writer_ctx(const char *file_path) {
    WriterCtx.out_file = fopen(file_path, "wb");
    WriterCtx.buf_out_size = ZSTD_CStreamOutSize();
    WriterCtx.buf_out = malloc(WriterCtx.buf_out_size);
    WriterCtx.cctx = ZSTD_createCCtx();
    ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL);
    ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE);
    LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path)
 }
 void zstd_write_string(const char *string, const size_t len) {
    ZSTD_inBuffer input = {string, len, 0};
    do {
        ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
        ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue);
        if (output.pos > 0) {
            ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
        }
    } while (input.pos != input.size);
 }
 void write_document_func(void *arg) {
    if (WriterCtx.out_file == NULL) {
        char dstfile[PATH_MAX];
        snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path);
        initialize_writer_ctx(dstfile);
    }
    document_t *doc = arg;
    char *json_str = build_json_string(doc);
    const size_t json_str_len = strlen(json_str);
    json_str = realloc(json_str, json_str_len + 1);
    *(json_str + json_str_len) = '\n';
    zstd_write_string(json_str, json_str_len + 1);
    free(json_str);
    free(doc->filepath);
 }
 void zstd_close() {
    if (WriterCtx.out_file == NULL) {
        LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup")
        return;
    }
    size_t remaining;
    do {
        ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
        remaining = ZSTD_endStream(WriterCtx.cctx, &output);
        if (output.pos > 0) {
            ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
        }
    } while (remaining != 0);
    ZSTD_freeCCtx(WriterCtx.cctx);
    free(WriterCtx.buf_out);
    fclose(WriterCtx.out_file);
    LOG_DEBUG("serialize.c", "End zstd stream & close index file")
 }
 void writer_cleanup() {
    zstd_close();
    WriterCtx.out_file = NULL;
 }
 void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON *json = cJSON_CreateObject();
    cJSON_AddStringToObject(json, "id", desc->id);
    cJSON_AddStringToObject(json, "version", desc->version);
    cJSON_AddStringToObject(json, "root", desc->root);
    cJSON_AddStringToObject(json, "name", desc->name);
    cJSON_AddStringToObject(json, "type", desc->type);
    cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
    cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
    if (fd < 0) {
        LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
    }
    char *str = cJSON_Print(json);
    size_t ret = write(fd, str, strlen(str));
    if (ret == -1) {
        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
    }
    free(str);
    close(fd);
    cJSON_Delete(json);
 }
 index_descriptor_t read_index_descriptor(char *path) {
    struct stat info;
    stat(path, &info);
    int fd = open(path, O_RDONLY);
    if (fd == -1) {
        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
    }
    char *buf = malloc(info.st_size + 1);
    size_t ret = read(fd, buf, info.st_size);
    if (ret == -1) {
        LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
    }
    *(buf + info.st_size) = '\0';
    close(fd);
    cJSON *json = cJSON_Parse(buf);
    index_descriptor_t descriptor;
    descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
    strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
    strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
    descriptor.root_len = (short) strlen(descriptor.root);
    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
    strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
    if (cJSON_GetObjectItem(json, "type") == NULL) {
        strcpy(descriptor.type, INDEX_TYPE_NDJSON);
    } else {
        strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
    }
    cJSON_Delete(json);
    free(buf);
    return descriptor;
 }
 void write_document(document_t *doc) {
-    tpool_add_work(ScanCtx.writer_pool, write_document_func, doc);
+    char *json_str = build_json_string(doc);
-}
+
-
+    database_write_document(ProcData.index_db, doc, json_str);
-void thread_cleanup() {
+    free(doc);
-    cleanup_parse();
+    free(json_str);
    cleanup_font();
 }
 void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) {
    cJSON *document = cJSON_Parse(line);
    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
    cJSON_AddStringToObject(document, "index", index_id);
    // Load meta from sidecar files
    cJSON *meta_obj = NULL;
    if (IndexCtx.meta != NULL) {
        const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
        if (meta_string != NULL) {
            meta_obj = cJSON_Parse(meta_string);
            cJSON *child;
            for (child = meta_obj->child; child != NULL; child = child->next) {
                char meta_key[4096];
                strcpy(meta_key, child->string);
                cJSON_DeleteItemFromObject(document, meta_key);
                cJSON_AddItemReferenceToObject(document, meta_key, child);
            }
        }
    }
    // Load tags from tags DB
    if (IndexCtx.tags != NULL) {
        const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
        if (tags_string != NULL) {
            cJSON *tags_arr = cJSON_Parse(tags_string);
            cJSON_DeleteItemFromObject(document, "tag");
            cJSON_AddItemToObject(document, "tag", tags_arr);
        }
    }
    func(document, path_md5_str);
    cJSON_DeleteItemFromObject(document, "_id");
    cJSON_Delete(document);
    if (meta_obj) {
        cJSON_Delete(meta_obj);
    }
 }
 void read_lines(const char *path, const line_processor_t processor) {
    dyn_buffer_t buf = dyn_buffer_create();
    // Initialize zstd things
    FILE *file = fopen(path, "rb");
    size_t const buf_in_size = ZSTD_DStreamInSize();
    void *const buf_in = malloc(buf_in_size);
    size_t const buf_out_size = ZSTD_DStreamOutSize();
    void *const buf_out = malloc(buf_out_size);
    ZSTD_DCtx *const dctx = ZSTD_createDCtx();
    size_t read;
    size_t last_ret = 0;
    while ((read = fread(buf_in, 1, buf_in_size, file))) {
        ZSTD_inBuffer input = {buf_in, read, 0};
        while (input.pos < input.size) {
            ZSTD_outBuffer output = {buf_out, buf_out_size, 0};
            size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
            for (int i = 0; i < output.pos; i++) {
                char c = ((char *) output.dst)[i];
                if (c == '\n') {
                    dyn_buffer_write_char(&buf, '\0');
                    processor.func(buf.buf, processor.data);
                    buf.cur = 0;
                } else {
                    dyn_buffer_write_char(&buf, c);
                }
            }
            last_ret = ret;
        }
    }
    if (last_ret != 0) {
        /* The last return value from ZSTD_decompressStream did not end on a
         * frame, but we reached the end of the file! We assume this is an
         * error, and the input was truncated.
         */
        LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret)
    }
    ZSTD_freeDCtx(dctx);
    free(buf_in);
    free(buf_out);
    dyn_buffer_destroy(&buf);
    fclose(file);
 }
 void read_index_ndjson(const char *line, void *_data) {
    void **data = _data;
    const char *index_id = data[0];
    index_func func = data[1];
    read_index_bin_handle_line(line, index_id, func);
 }
 void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
    if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
        read_lines(path, (line_processor_t) {
                .data = (void *[2]) {(void *) index_id, func},
                .func = read_index_ndjson,
        });
    }
 }
 static __thread GHashTable *IncrementalReadTable = NULL;
 void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
    const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
    incremental_put(IncrementalReadTable, path_md5_str, mtime);
 }
 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
    IncrementalReadTable = table;
    read_index(filepath, desc->id, desc->type, json_put_incremental);
 }
 static __thread GHashTable *IncrementalCopyTable = NULL;
 static __thread GHashTable *IncrementalNewTable = NULL;
 static __thread store_t *IncrementalCopySourceStore = NULL;
 static __thread store_t *IncrementalCopyDestinationStore = NULL;
 void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
        // Copy index line
        cJSON_DeleteItemFromObject(document, "index");
        char *json_str = cJSON_PrintUnformatted(document);
        const size_t json_str_len = strlen(json_str);
        json_str = realloc(json_str, json_str_len + 1);
        *(json_str + json_str_len) = '\n';
        // Copy tn store contents
        size_t buf_len;
        char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
        if (buf_len != 0) {
            store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
            free(buf);
        }
        // Also copy additional thumbnails
        if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
            const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
            for (int i = 1; i < thumbnail_count; i++) {
                char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
                snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
                buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
                if (buf_len != 0) {
                    store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
                    free(buf);
                }
            }
        }
        zstd_write_string(json_str, json_str_len + 1);
        free(json_str);
    }
 }
 /**
 * Copy items from an index that are in the copy_table. Also copies from
 * the store.
 */
 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table) {
    if (WriterCtx.out_file == NULL) {
        initialize_writer_ctx(dst_filepath);
    }
    IncrementalCopyTable = copy_table;
    IncrementalCopySourceStore = store;
    IncrementalCopyDestinationStore = dst_store;
    read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
 }
 void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
    char doc_id_n[SIST_DOC_ID_LEN + 1];
    doc_id_n[SIST_DOC_ID_LEN] = '\0';
    doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
    // do not delete archive virtual entries
    if (cJSON_GetObjectItem(document, "parent") == NULL 
        && !incremental_get(IncrementalCopyTable, doc_id)
        && !incremental_get(IncrementalNewTable, doc_id)
        ) {
        memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
        zstd_write_string(doc_id, sizeof(doc_id_n));
    }
 }
 void incremental_delete(const char *del_filepath, const char *index_filepath,
                        GHashTable *copy_table, GHashTable *new_table) {
    if (WriterCtx.out_file == NULL) {
        initialize_writer_ctx(del_filepath);
    }
    IncrementalCopyTable = copy_table;
    IncrementalNewTable = new_table;
    read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
 }
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -2,55 +2,7 @@
 #define SIST2_SERIALIZE_H
 #include "src/sist.h"
 #include "store.h"
 #include <sys/syscall.h>
 #include <glib.h>
 typedef struct line_processor {
  void* data;
  void (*func)(const char*, void*);
 } line_processor_t;
 typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);
 void incremental_delete(const char *del_filepath, const char* index_filepath, 
                        GHashTable *copy_table, GHashTable *new_table);
 void write_document(document_t *doc);
 void read_lines(const char *path, const line_processor_t processor);
 void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
 void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
 /**
 * Must be called after write_document
 */
 void thread_cleanup();
 void writer_cleanup();
 void write_index_descriptor(char *path, index_descriptor_t *desc);
 index_descriptor_t read_index_descriptor(char *path);
 // caller ensures char file_path[PATH_MAX]
 #define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
    snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path);              \
    if (access(file_path, R_OK) == 0) {                                                 \
        action_ok;                                                                      \
    } else {                                                                            \
        action_main_fail;                                                               \
    }                                                                                   \
    snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path);          \
    if ((cond_original) && access(file_path, R_OK) == 0) {                              \
        action_ok;                                                                      \
    }                                                                                   \
 #endif
--- a/src/io/store.c
+++ b/src/io/store.c
@@ -1,195 +0,0 @@
 #include "store.h"
 #include "src/ctx.h"
 store_t *store_create(const char *path, size_t chunk_size) {
    store_t *store = malloc(sizeof(struct store_t));
    mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
    strcpy(store->path, path);
 #if (SIST_FAKE_STORE != 1)
    store->chunk_size = chunk_size;
    pthread_rwlock_init(&store->lock, NULL);
    mdb_env_create(&store->env);
    int open_ret = mdb_env_open(store->env,
                                path,
                                MDB_WRITEMAP | MDB_MAPASYNC,
                                S_IRUSR | S_IWUSR
    );
    if (open_ret != 0) {
        LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
    }
    store->size = (size_t) store->chunk_size;
    mdb_env_set_mapsize(store->env, store->size);
    // Open dbi
    MDB_txn *txn;
    mdb_txn_begin(store->env, NULL, 0, &txn);
    mdb_dbi_open(txn, NULL, 0, &store->dbi);
    mdb_txn_commit(txn);
 #endif
    return store;
 }
 void store_destroy(store_t *store) {
 #if (SIST_FAKE_STORE != 1)
    pthread_rwlock_destroy(&store->lock);
    mdb_dbi_close(store->env, store->dbi);
    mdb_env_close(store->env);
 #endif
    free(store);
 }
 void store_flush(store_t *store) {
    mdb_env_sync(store->env, TRUE);
 }
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
    if (LogCtx.very_verbose) {
        LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
    }
 #if (SIST_FAKE_STORE != 1)
    MDB_val mdb_key;
    mdb_key.mv_data = key;
    mdb_key.mv_size = key_len;
    MDB_val mdb_value;
    mdb_value.mv_data = buf;
    mdb_value.mv_size = buf_len;
    MDB_txn *txn;
    pthread_rwlock_rdlock(&store->lock);
    mdb_txn_begin(store->env, NULL, 0, &txn);
    int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
    ScanCtx.stat_tn_size += buf_len;
    int db_full = FALSE;
    int should_abort_transaction = FALSE;
    if (put_ret == MDB_MAP_FULL) {
        db_full = TRUE;
        should_abort_transaction = TRUE;
    } else {
        int commit_ret = mdb_txn_commit(txn);
        if (commit_ret == MDB_MAP_FULL) {
            db_full = TRUE;
        }
    }
    if (db_full) {
        LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
        if (should_abort_transaction) {
            mdb_txn_abort(txn);
        }
        pthread_rwlock_unlock(&store->lock);
        // Cannot resize when there is a opened transaction.
        //  Resize take effect on the next commit.
        pthread_rwlock_wrlock(&store->lock);
        store->size += store->chunk_size;
        int resize_ret = mdb_env_set_mapsize(store->env, store->size);
        if (resize_ret != 0) {
            LOG_ERROR("store.c", mdb_strerror(put_ret))
        }
        mdb_txn_begin(store->env, NULL, 0, &txn);
        int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
        if (put_ret_retry != 0) {
            LOG_ERROR("store.c", mdb_strerror(put_ret))
        }
        int ret = mdb_txn_commit(txn);
        if (ret != 0) {
            LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
                       store->path, mdb_strerror(ret), ret,
                       put_ret, put_ret_retry);
        }
        LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
    } else if (put_ret != 0) {
        LOG_ERROR("store.c", mdb_strerror(put_ret))
    }
    pthread_rwlock_unlock(&store->lock);
 #endif
 }
 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
    char *buf = NULL;
 #if (SIST_FAKE_STORE != 1)
    MDB_val mdb_key;
    mdb_key.mv_data = key;
    mdb_key.mv_size = key_len;
    MDB_val mdb_value;
    MDB_txn *txn;
    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
    int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
    if (get_ret == MDB_NOTFOUND) {
        *ret_vallen = 0;
    } else {
        *ret_vallen = mdb_value.mv_size;
        buf = malloc(mdb_value.mv_size);
        memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
    }
    mdb_txn_abort(txn);
 #endif
    return buf;
 }
 GHashTable *store_read_all(store_t *store) {
    int count = 0;
    GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
    MDB_txn *txn = NULL;
    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
    MDB_cursor *cur = NULL;
    mdb_cursor_open(txn, store->dbi, &cur);
    MDB_val key;
    MDB_val value;
    while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
        char *key_str = malloc(key.mv_size);
        memcpy(key_str, key.mv_data, key.mv_size);
        char *val_str = malloc(value.mv_size);
        memcpy(val_str, value.mv_data, value.mv_size);
        g_hash_table_insert(table, key_str, val_str);
        count += 1;
    }
    const char *path;
    mdb_env_get_path(store->env, &path);
    LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
    mdb_cursor_close(cur);
    mdb_txn_abort(txn);
    return table;
 }
 void store_copy(store_t *store, const char *destination) {
    mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
    mdb_env_copy(store->env, destination);
 }
--- a/src/io/store.h
+++ b/src/io/store.h
@@ -1,37 +0,0 @@
 #ifndef SIST2_STORE_H
 #define SIST2_STORE_H
 #include <pthread.h>
 #include <lmdb.h>
 #include <glib.h>
 #define STORE_SIZE_TN (1024 * 1024 * 5)
 #define STORE_SIZE_TAG (1024 * 1024)
 #define STORE_SIZE_META STORE_SIZE_TAG
 typedef struct store_t {
    char path[PATH_MAX];
    char *tmp_path;
    MDB_dbi dbi;
    MDB_env *env;
    size_t size;
    size_t chunk_size;
    pthread_rwlock_t lock;
 } store_t;
 store_t *store_create(const char *path, size_t chunk_size);
 void store_destroy(store_t *store);
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
 void store_flush(store_t *store);
 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
 GHashTable *store_read_all(store_t *store);
 void store_copy(store_t *store, const char *destination);
 #endif
--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -1,44 +1,12 @@
 #include "walk.h"
 #include "src/ctx.h"
-#include "src/parsing/parse.h"
+#include "src/parsing/fs_util.h"
 #include <ftw.h>
 #include <pthread.h>
 #define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)
 __always_inline
 parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
    int len = (int) strlen(filepath);
    parse_job_t *job = malloc(sizeof(parse_job_t) + len);
    strcpy(job->filepath, filepath);
    job->base = base;
    char *p = strrchr(filepath + base, '.');
    if (p != NULL) {
        job->ext = (int) (p - filepath + 1);
    } else {
        job->ext = len;
    }
    job->vfile.info = *info;
    job->parent[0] = '\0';
    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
    // Filesystem reads are always rewindable
    job->vfile.read_rewindable = fs_read;
    job->vfile.reset = fs_reset;
    job->vfile.close = fs_close;
    job->vfile.fd = -1;
    job->vfile.is_fs_file = TRUE;
    job->vfile.has_checksum = FALSE;
    job->vfile.rewind_buffer_size = 0;
    job->vfile.rewind_buffer = NULL;
    job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
    return job;
 }
 int sub_strings[30];
 #define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
@@ -53,12 +21,9 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
    }
    if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
-        LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
+        LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
        if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
            ScanCtx.dbg_excluded_files_count += 1;
            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
        } else if (typeflag == FTW_D) {
            return FTW_SKIP_SUBTREE;
        }
@@ -67,8 +32,13 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
    }
    if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
-        parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
+        parse_job_t *job = create_parse_job(filepath, (int) info->st_mtim.tv_sec, info->st_size);
-        tpool_add_work(ScanCtx.pool, parse, job);
+
        tpool_add_work(ScanCtx.pool, &(job_t) {
                .type = JOB_PARSE_JOB,
                .parse_job = job
        });
        free(job);
    }
    return FTW_CONTINUE;
@@ -109,14 +79,7 @@ int iterate_file_list(void *input_file) {
        }
        if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
-            LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
+            LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
            if (S_ISREG(info.st_mode)) {
                pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
                ScanCtx.dbg_excluded_files_count += 1;
                pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
            }
            continue;
        }
@@ -124,11 +87,14 @@ int iterate_file_list(void *input_file) {
            LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
        }
-        int base = (int) (strrchr(buf, '/') - buf) + 1;
+        parse_job_t *job = create_parse_job(absolute_path, (int) info.st_mtim.tv_sec, info.st_size);
        parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
        free(absolute_path);
-        tpool_add_work(ScanCtx.pool, parse, job);
+
        tpool_add_work(ScanCtx.pool, &(job_t) {
                .type = JOB_PARSE_JOB,
                .parse_job = job
        });
        free(job);
    }
    return 0;
--- a/src/log.c
+++ b/src/log.c
@@ -21,8 +21,6 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
    char log_str[LOG_MAX_LENGTH];
    unsigned long long pid = (unsigned long long) pthread_self();
    char datetime[32];
    time_t t;
    struct tm result;
@@ -42,8 +40,8 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
+                "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
-                pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
+                ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
        );
        cJSON_Delete(filepath_json);
@@ -58,15 +56,15 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04llX]%s [%s] [%s %s] ",
+                "\033[%dmT%d%s [%s] [%s %s] ",
-                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
                datetime, log_levels[level], filepath
        );
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04llX] [%s] [%s %s] ",
+                "T%d [%s] [%s %s] ",
-                pid, datetime, log_levels[level], filepath
+                ProcData.thread_id, datetime, log_levels[level], filepath
        );
    }
@@ -112,8 +110,6 @@ void sist_log(const char *filepath, int level, char *str) {
    char log_str[LOG_MAX_LENGTH];
    unsigned long long pid = (unsigned long long) pthread_self();
    char datetime[32];
    time_t t;
    struct tm result;
@@ -132,8 +128,8 @@ void sist_log(const char *filepath, int level, char *str) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
+                "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
-                pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
+                ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
        );
        cJSON_Delete(log_str_json);
@@ -147,16 +143,16 @@ void sist_log(const char *filepath, int level, char *str) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
+                "\033[%dmT%d%s [%s] [%s %s] %s \033[0m\n",
-                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
                datetime, log_levels[level], filepath,
                str
        );
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04llX] [%s] [%s %s] %s \n",
+                "T%d [%s] [%s %s] %s \n",
-                pid, datetime, log_levels[level], filepath,
+                ProcData.thread_id, datetime, log_levels[level], filepath,
                str
        );
    }
--- a/src/log.h
+++ b/src/log.h
@@ -2,6 +2,7 @@
 #define SIST2_LOG_H
 #include <signal.h>
 #define LOG_MAX_LENGTH 8192
 #define LOG_SIST_DEBUG 0
@@ -10,32 +11,37 @@
 #define LOG_SIST_ERROR 3
 #define LOG_SIST_FATAL 4
-#define LOG_DEBUGF(filepath, fmt, ...) \
+#define LOG_DEBUGF(filepath, fmt, ...) do{\
-    if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}
+    if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}}while(0)
-#define LOG_DEBUG(filepath, str) \
+#define LOG_DEBUG(filepath, str) do{\
-    if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}
+    if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}}while(0)
-#define LOG_INFOF(filepath, fmt, ...) \
+#define LOG_INFOF(filepath, fmt, ...) do {\
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}} while(0)
-#define LOG_INFO(filepath, str) \
+#define LOG_INFO(filepath, str) do {\
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}} while(0)
-#define LOG_WARNINGF(filepath, fmt, ...) \
+#define LOG_WARNINGF(filepath, fmt, ...) do {\
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}}while(0)
-#define LOG_WARNING(filepath, str) \
+#define LOG_WARNING(filepath, str) do{\
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}}while(0)
-#define LOG_ERRORF(filepath, fmt, ...) \
+#define LOG_ERRORF(filepath, fmt, ...) do {\
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}}while(0)
-#define LOG_ERROR(filepath, str) \
+#define LOG_ERROR(filepath, str) do{\
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}}while(0)
-#define LOG_FATALF(filepath, fmt, ...) \
+#define LOG_FATALF(filepath, fmt, ...)\
    sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__);\
-    exit(-1);
+    raise(SIGUSR1)
 #define LOG_FATAL(filepath, str) \
    sist_log(filepath, LOG_SIST_FATAL, str);\
-    exit(-1);
+    exit(SIGUSR1)
 #define LOG_FATALF_NO_EXIT(filepath, fmt, ...) \
    sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__)
 #define LOG_FATAL_NO_EXIT(filepath, str) \
    sist_log(filepath, LOG_SIST_FATAL, str)
 #include "sist.h"
--- a/src/magic_generated.c
+++ b/src/magic_generated.c
--- a/src/main.c
+++ b/src/main.c
@@ -5,8 +5,6 @@
 #include <locale.h>
 #include "cli.h"
 #include "io/serialize.h"
 #include "io/store.h"
 #include "tpool.h"
 #include "io/walk.h"
 #include "index/elastic.h"
@@ -16,13 +14,9 @@
 #include "auth0/auth0_c_api.h"
 #include <signal.h>
-#include <unistd.h>
+#include <pthread.h>
-#include "stats.h"
+#include "src/database/database.h"
 #define DESCRIPTION "Lightning-fast file system indexer and search tool."
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
 static const char *const usage[] = {
@@ -34,109 +28,62 @@ static const char *const usage[] = {
 };
-static __sighandler_t sigsegv_handler = NULL;
+void database_scan_begin(scan_args_t *args) {
-static __sighandler_t sigabrt_handler = NULL;
+    index_descriptor_t *desc = &ScanCtx.index.desc;
-void sig_handler(int signum) {
+    database_t *db = database_create(args->output, INDEX_DATABASE);
-    LogCtx.verbose = TRUE;
+    if (args->incremental) {
-    LogCtx.very_verbose = TRUE;
+        // Update existing descriptor
        database_open(db);
        index_descriptor_t *original_desc = database_read_index_descriptor(db);
-    LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
+        // copy original index id
-    LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
+        strcpy(desc->id, original_desc->id);
-    if (ScanCtx.dbg_current_files != NULL) {
+        if (original_desc->version_major != VersionMajor) {
-        GHashTableIter iter;
+            LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
        g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
        void *key;
        void *value;
        while (g_hash_table_iter_next(&iter, &key, &value)) {
            parse_job_t *job = value;
            if (isatty(STDERR_FILENO)) {
                LOG_DEBUGF(
                        "*SIGNAL HANDLER*",
                        "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
                        31 + ((unsigned int) key) % 7, key, job->filepath
                );
            } else {
                LOG_DEBUGF(
                        "*SIGNAL HANDLER*",
                        "THREAD [%04llX] was working on job %s",
                        key, job->filepath
                );
            }
        }
    }
-    if (ScanCtx.pool != NULL) {
+        strcpy(original_desc->root, desc->root);
-        tpool_dump_debug_info(ScanCtx.pool);
+        original_desc->root_len = desc->root_len;
-    }
+        strcpy(original_desc->rewrite_url, desc->rewrite_url);
        strcpy(original_desc->name, desc->name);
-    if (IndexCtx.pool != NULL) {
+        time(&original_desc->timestamp);
        tpool_dump_debug_info(IndexCtx.pool);
    }
-    LOG_INFO(
+        database_write_index_descriptor(db, original_desc);
-            "*SIGNAL HANDLER*",
+        free(original_desc);
            "Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
    )
    LOG_INFO(
            "*SIGNAL HANDLER*",
            "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
    )
-#ifndef SIST_DEBUG
+        database_incremental_scan_begin(db);
    LOG_WARNING(
            "*SIGNAL HANDLER*",
            "You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
            "releases page to provide additionnal information when submitting a bug report."
    )
 #endif
    if (signum == SIGSEGV && sigsegv_handler != NULL) {
        sigsegv_handler(signum);
    } else if (signum == SIGABRT && sigabrt_handler != NULL) {
        sigabrt_handler(signum);
    }
    exit(-1);
 }
 void init_dir(const char *dirpath, scan_args_t *args) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
    time(&ScanCtx.index.desc.timestamp);
    strcpy(ScanCtx.index.desc.version, Version);
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
    if (args->incremental != NULL) {
        // copy old index id
        char descriptor_path[PATH_MAX];
        snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
        memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
    } else {
        // Create new descriptor
        time(&desc->timestamp);
        strcpy(desc->version, Version);
        desc->version_major = VersionMajor;
        desc->version_minor = VersionMinor;
        desc->version_patch = VersionPatch;
        // generate new index id based on timestamp
        unsigned char index_md5[MD5_DIGEST_LENGTH];
        MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
        buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
        database_initialize(db);
        database_open(db);
        database_write_index_descriptor(db, desc);
    }
-    write_index_descriptor(path, &ScanCtx.index.desc);
+    database_close(db, FALSE);
 }
-void scan_print_header() {
+void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
-    LOG_INFOF("main.c", "sist2 v%s", Version)
+    database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
 }
-void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
+void log_callback(const char *filepath, int level, char *str) {
    store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
 }
 void _log(const char *filepath, int level, char *str) {
    if (level == LEVEL_FATAL) {
        sist_log(filepath, level, str);
        exit(-1);
@@ -153,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
    }
 }
-void _logf(const char *filepath, int level, char *format, ...) {
+void logf_callback(const char *filepath, int level, char *format, ...) {
    va_list args;
@@ -177,17 +124,12 @@ void _logf(const char *filepath, int level, char *format, ...) {
 void initialize_scan_context(scan_args_t *args) {
    ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
    pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
    pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
    pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
    ScanCtx.calculate_checksums = args->calculate_checksums;
    // Archive
    ScanCtx.arc_ctx.mode = args->archive_mode;
-    ScanCtx.arc_ctx.log = _log;
+    ScanCtx.arc_ctx.log = log_callback;
-    ScanCtx.arc_ctx.logf = _logf;
+    ScanCtx.arc_ctx.logf = logf_callback;
    ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
    if (args->archive_passphrase != NULL) {
        strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
@@ -196,17 +138,16 @@ void initialize_scan_context(scan_args_t *args) {
    }
    // Comic
-    ScanCtx.comic_ctx.log = _log;
+    ScanCtx.comic_ctx.log = log_callback;
-    ScanCtx.comic_ctx.logf = _logf;
+    ScanCtx.comic_ctx.logf = logf_callback;
-    ScanCtx.comic_ctx.store = _store;
+    ScanCtx.comic_ctx.store = write_thumbnail_callback;
    ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.comic_ctx.tn_size = args->tn_size;
    ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
-    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
+    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr");
-    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
+    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz");
    // Ebook
    pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
    ScanCtx.ebook_ctx.content_size = args->content_size;
    ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.ebook_ctx.tn_size = args->tn_size;
@@ -214,25 +155,25 @@ void initialize_scan_context(scan_args_t *args) {
        ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
        ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
    }
-    ScanCtx.ebook_ctx.log = _log;
+    ScanCtx.ebook_ctx.log = log_callback;
-    ScanCtx.ebook_ctx.logf = _logf;
+    ScanCtx.ebook_ctx.logf = logf_callback;
-    ScanCtx.ebook_ctx.store = _store;
+    ScanCtx.ebook_ctx.store = write_thumbnail_callback;
    ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
    ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
    // Font
    ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
-    ScanCtx.font_ctx.log = _log;
+    ScanCtx.font_ctx.log = log_callback;
-    ScanCtx.font_ctx.logf = _logf;
+    ScanCtx.font_ctx.logf = logf_callback;
-    ScanCtx.font_ctx.store = _store;
+    ScanCtx.font_ctx.store = write_thumbnail_callback;
    // Media
    ScanCtx.media_ctx.tn_qscale = args->tn_quality;
    ScanCtx.media_ctx.tn_size = args->tn_size;
    ScanCtx.media_ctx.tn_count = args->tn_count;
-    ScanCtx.media_ctx.log = _log;
+    ScanCtx.media_ctx.log = log_callback;
-    ScanCtx.media_ctx.logf = _logf;
+    ScanCtx.media_ctx.logf = logf_callback;
-    ScanCtx.media_ctx.store = _store;
+    ScanCtx.media_ctx.store = write_thumbnail_callback;
    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
    ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
    ScanCtx.media_ctx.read_subtitles = args->tn_count;
@@ -246,32 +187,29 @@ void initialize_scan_context(scan_args_t *args) {
    // OOXML
    ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.ooxml_ctx.content_size = args->content_size;
-    ScanCtx.ooxml_ctx.log = _log;
+    ScanCtx.ooxml_ctx.log = log_callback;
-    ScanCtx.ooxml_ctx.logf = _logf;
+    ScanCtx.ooxml_ctx.logf = logf_callback;
-    ScanCtx.ooxml_ctx.store = _store;
+    ScanCtx.ooxml_ctx.store = write_thumbnail_callback;
    // MOBI
    ScanCtx.mobi_ctx.content_size = args->content_size;
-    ScanCtx.mobi_ctx.log = _log;
+    ScanCtx.mobi_ctx.log = log_callback;
-    ScanCtx.mobi_ctx.logf = _logf;
+    ScanCtx.mobi_ctx.logf = logf_callback;
    // TEXT
    ScanCtx.text_ctx.content_size = args->content_size;
-    ScanCtx.text_ctx.log = _log;
+    ScanCtx.text_ctx.log = log_callback;
-    ScanCtx.text_ctx.logf = _logf;
+    ScanCtx.text_ctx.logf = logf_callback;
    // MSDOC
    ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.msdoc_ctx.tn_size = args->tn_size;
    ScanCtx.msdoc_ctx.content_size = args->content_size;
-    ScanCtx.msdoc_ctx.log = _log;
+    ScanCtx.msdoc_ctx.log = log_callback;
-    ScanCtx.msdoc_ctx.logf = _logf;
+    ScanCtx.msdoc_ctx.logf = logf_callback;
-    ScanCtx.msdoc_ctx.store = _store;
+    ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
-    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
+    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");
    ScanCtx.threads = args->threads;
    ScanCtx.depth = args->depth;
    ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;
    strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
    strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@@ -284,176 +222,66 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
    ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.raw_ctx.tn_size = args->tn_size;
-    ScanCtx.raw_ctx.log = _log;
+    ScanCtx.raw_ctx.log = log_callback;
-    ScanCtx.raw_ctx.logf = _logf;
+    ScanCtx.raw_ctx.logf = logf_callback;
-    ScanCtx.raw_ctx.store = _store;
+    ScanCtx.raw_ctx.store = write_thumbnail_callback;
    // Wpd
    ScanCtx.wpd_ctx.content_size = args->content_size;
-    ScanCtx.wpd_ctx.log = _log;
+    ScanCtx.wpd_ctx.log = log_callback;
-    ScanCtx.wpd_ctx.logf = _logf;
+    ScanCtx.wpd_ctx.logf = logf_callback;
-    ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
+    ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");
    // Json
    ScanCtx.json_ctx.content_size = args->content_size;
-    ScanCtx.json_ctx.log = _log;
+    ScanCtx.json_ctx.log = log_callback;
-    ScanCtx.json_ctx.logf = _logf;
+    ScanCtx.json_ctx.logf = logf_callback;
-    ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
+    ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
-    ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
+    ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
 }
 /**
 * Loads an existing index as the baseline for incremental scanning.
 *   1. load old index files (original+main) => original_table
 *   2. allocate empty table                 => copy_table
 *   3. allocate empty table                 => new_table
 * the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
 * and consumed in main.c:save_incremental_index
 *
 * Note: the existing index may or may not be of incremental index form.
 */
 void load_incremental_index(const scan_args_t *args) {
    char file_path[PATH_MAX];
    ScanCtx.original_table = incremental_get_table();
    ScanCtx.copy_table = incremental_get_table();
    ScanCtx.new_table = incremental_get_table();
    char descriptor_path[PATH_MAX];
    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
    index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
    if (strcmp(original_desc.version, Version) != 0) {
        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
    }
    READ_INDICES(
            file_path,
            args->incremental,
            incremental_read(ScanCtx.original_table, file_path, &original_desc),
            LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
            TRUE
    );
    LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
 }
 /**
 * Saves an incremental index.
 * Before calling this function, the scanner should have finished writing the main index.
 *   1. Build original_table - new_table => delete_table
 *   2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
 */
 void save_incremental_index(scan_args_t *args) {
    char dst_path[PATH_MAX];
    char store_path[PATH_MAX];
    char file_path[PATH_MAX];
    char del_path[PATH_MAX];
    snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
    snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
    store_t *source = store_create(store_path, STORE_SIZE_TN);
    LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
              g_hash_table_size(ScanCtx.original_table),
              g_hash_table_size(ScanCtx.copy_table),
              g_hash_table_size(ScanCtx.new_table));
    snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
    READ_INDICES(file_path, args->incremental,
                 incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
                 perror("incremental_delete"), 1);
    writer_cleanup();
    READ_INDICES(file_path, args->incremental,
                 incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
                 perror("incremental_copy"), 1);
    writer_cleanup();
    store_destroy(source);
    snprintf(store_path, PATH_MAX, "%stags", args->incremental);
    snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
    store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
    store_copy(source_tags, dst_path);
    store_destroy(source_tags);
 }
 /**
 * An index can be either incremental or non-incremental (initial index).
 * For an initial index, there is only the "main" index.
 * For an incremental index, there are, additionally:
 *   - An "original" index, referencing all files unchanged since the previous index.
 *   - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
 * Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
 * and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
 * the old "delete" index can be safely ignored.
 */
 void sist2_scan(scan_args_t *args) {
    ScanCtx.mime_table = mime_get_mime_table();
    ScanCtx.ext_table = mime_get_ext_table();
    initialize_scan_context(args);
-    init_dir(ScanCtx.index.path, args);
+    database_scan_begin(args);
-    char store_path[PATH_MAX];
+    LOG_INFOF("main.c", "sist2 v%s", Version);
    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
    ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
-    snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
+    ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
    ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
    scan_print_header();
    if (args->incremental != NULL) {
        load_incremental_index(args);
    }
    ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
    tpool_start(ScanCtx.pool);
    ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
    tpool_start(ScanCtx.writer_pool);
    if (args->list_path) {
        // Scan using file list
        int list_ret = iterate_file_list(args->list_file);
        if (list_ret != 0) {
-            LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
+            LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret);
        }
    } else {
        // Scan directory recursively
        int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
        if (walk_ret == -1) {
-            LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
+            LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno);
        }
    }
    tpool_wait(ScanCtx.pool);
    tpool_destroy(ScanCtx.pool);
-    tpool_wait(ScanCtx.writer_pool);
+    LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
-    tpool_destroy(ScanCtx.writer_pool);
+    LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);
-    LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
+    database_t *db = database_create(args->output, INDEX_DATABASE);
-    LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
+    database_open(db);
    LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
    LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
    LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
-    if (args->incremental != NULL) {
+    if (args->incremental != FALSE) {
-        save_incremental_index(args);
+        database_incremental_scan_end(db);
    }
-    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
+    database_generate_stats(db, args->treemap_threshold);
-
+    database_close(db, args->optimize_database);
    store_destroy(ScanCtx.index.store);
    store_destroy(ScanCtx.index.meta_store);
 }
 void sist2_index(index_args_t *args) {
    char file_path[PATH_MAX];
    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
@@ -464,91 +292,72 @@ void sist2_index(index_args_t *args) {
        elastic_init(args->force_reset, args->es_mappings, args->es_settings);
    }
-    char descriptor_path[PATH_MAX];
+    database_t *db = database_create(args->index_path, INDEX_DATABASE);
-    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
+    database_open(db);
    index_descriptor_t *desc = database_read_index_descriptor(db);
    database_close(db, FALSE);
-    index_descriptor_t desc = read_index_descriptor(descriptor_path);
+    LOG_DEBUGF("main.c", "Index version %s", desc->version);
-    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+    if (desc->version_major != VersionMajor) {
-
+        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version);
    if (strcmp(desc.version, Version) != 0) {
        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version)
    }
-    DIR *dir = opendir(args->index_path);
+    IndexCtx.pool = tpool_create(args->threads, args->print == FALSE);
    if (dir == NULL) {
        LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
    }
    char path_tmp[PATH_MAX];
    snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
    IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
    IndexCtx.tags = store_read_all(IndexCtx.tag_store);
    snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
    IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
    IndexCtx.meta = store_read_all(IndexCtx.meta_store);
    index_func f;
    if (args->print) {
        f = print_json;
    } else {
        f = index_json;
    }
    IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
    tpool_start(IndexCtx.pool);
-    READ_INDICES(file_path, args->index_path, {
+    int cnt = 0;
        read_index(file_path, desc.id, desc.type, f);
        LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
    }, {}, !args->incremental);
-    // Only read the _delete index if we're sending data to ES
+    db = database_create(args->index_path, INDEX_DATABASE);
-    if (!args->print) {
+    database_open(db);
-        snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
+    database_iterator_t *iterator = database_create_document_iterator(db);
-        if (0 == access(file_path, R_OK)) {
+    database_document_iter_foreach(json, iterator) {
-            read_lines(file_path, (line_processor_t) {
+        const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
-                    .data = NULL,
+        if (args->print) {
-                    .func = delete_document
+            print_json(json, doc_id);
-            });
+        } else {
-            LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
+            index_json(json, doc_id);
            cnt += 1;
        }
    }
-    closedir(dir);
+    free(iterator);
    database_close(db, FALSE);
    if (!args->print) {
        database_iterator_t *del_iter = database_create_delete_list_iterator(db);
        database_delete_list_iter_foreach(id, del_iter) {
            delete_document(id);
            free(id);
        }
    }
    tpool_wait(IndexCtx.pool);
    tpool_destroy(IndexCtx.pool);
    if (IndexCtx.needs_es_connection) {
-        finish_indexer(args->script, args->async_script, desc.id);
+        finish_indexer(args->script, args->async_script, desc->id);
    }
-
+    free(desc);
    store_destroy(IndexCtx.tag_store);
    store_destroy(IndexCtx.meta_store);
    g_hash_table_remove_all(IndexCtx.tags);
    g_hash_table_destroy(IndexCtx.tags);
 }
 void sist2_exec_script(exec_args_t *args) {
    LogCtx.verbose = TRUE;
    char descriptor_path[PATH_MAX];
    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
    index_descriptor_t desc = read_index_descriptor(descriptor_path);
    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
    IndexCtx.needs_es_connection = TRUE;
-    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+    database_t *db = database_create(args->index_path, INDEX_DATABASE);
    database_open(db);
-    execute_update_script(args->script, args->async_script, desc.id);
+    index_descriptor_t *desc = database_read_index_descriptor(db);
    LOG_DEBUGF("main.c", "Index version %s", desc->version);
    execute_update_script(args->script, args->async_script, desc->id);
    free(args->script);
    database_close(db, FALSE);
 }
 void sist2_web(web_args_t *args) {
@@ -572,23 +381,17 @@ void sist2_web(web_args_t *args) {
    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
            return;
        }
        char path_tmp[PATH_MAX];
        snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
        WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
        snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
        mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
        WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
        snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
        WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
        strcpy(WebCtx.indices[i].path, abs_path);
-        LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
+
        WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
        database_open(WebCtx.indices[i].db);
        index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
        WebCtx.indices[i].desc = *desc;
        free(desc);
        LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name);
        free(abs_path);
    }
@@ -603,7 +406,7 @@ void sist2_web(web_args_t *args) {
 *   Negative number          -> Raise error
 *   Specified a valid number -> Continue as normal
 */
-int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
+int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) {
    int specified_value = *(int *) option->value;
    if (specified_value == 0) {
@@ -616,11 +419,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars
    }
 }
 int main(int argc, const char *argv[]) {
    sigsegv_handler = signal(SIGSEGV, sig_handler);
    sigabrt_handler = signal(SIGABRT, sig_handler);
    setlocale(LC_ALL, "");
    scan_args_t *scan_args = scan_args_create();
@@ -640,38 +439,37 @@ int main(int argc, const char *argv[]) {
    struct argparse_option options[] = {
            OPT_HELP(),
-            OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
+            OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
-            OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
+            OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
-            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
+            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
            OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),
            OPT_GROUP("Scan options"),
-            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
-            OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
+            OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
-                        "Total memory threshold in MiB for scan throttling. DEFAULT=0",
+                        "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
-                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
            OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
                      set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
            OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
-                        "Thumbnail size, in pixels. DEFAULT=500",
+                        "Thumbnail size, in pixels. DEFAULT: 552",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
            OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
-                        "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
+                        "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
            OPT_INTEGER(0, "content-size", &scan_args->content_size,
-                        "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
+                        "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
-            OPT_STRING(0, "incremental", &scan_args->incremental,
+            OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
-                       "Reuse an existing index and only scan modified files."),
+            OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
-            OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
+                        "If the output file path exists, only scan new or modified files."),
            OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
                        "Defragment index file after scan to reduce its file size."),
            OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
-            OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
+            OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
            OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
                                                       "Use 0 to only scan files in PATH. DEFAULT: -1"),
            OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
-                                                          "skip: Don't parse, list: only get file names as text, "
+                                                          "skip: don't scan, list: only save file names as text, "
-                                                          "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
+                                                          "shallow: don't scan archives inside archives. DEFAULT: recurse"),
            OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
                       "Passphrase for encrypted archive files"),
@@ -680,8 +478,8 @@ int main(int argc, const char *argv[]) {
                       "which are installed on your machine)"),
            OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
            OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
-            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
+            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
-            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
+            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
            OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
                                                                                  "(see USAGE.md). DEFAULT: 0.0005"),
            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
@@ -689,47 +487,52 @@ int main(int argc, const char *argv[]) {
                        "(see USAGE.md). DEFAULT: 2000"),
            OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
            OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
-                        "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
+                        "Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
            OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
            OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
                                                              " instead of normal directory traversal. Use '-' to read"
                                                              " from stdin."),
            OPT_GROUP("Index options"),
-            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+                        "Do not verify SSL connections to Elasticsearch."),
-            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
            OPT_BOOLEAN('p', "print", &index_args->print,
                        "Print JSON documents to stdout instead of indexing to elasticsearch."),
            OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
                        "Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
            OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
-            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
+            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
-            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
+            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),
                                                                      "(You must use this option the first time you use the index command)"),
            OPT_GROUP("Web options"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+                        "Do not verify SSL connections to Elasticsearch."),
-            OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
            OPT_STRING(0, "bind", &web_args->listen_address,
                       "Listen for connections on this address. DEFAULT: localhost:4090"),
            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
            OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
            OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
            OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"),
-            OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from <domain>/pem"),
+            OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path,
                       "Path to Auth0 public key file extracted from <domain>/pem"),
            OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
            OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
            OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
            OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
            OPT_GROUP("Exec-script options"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+                        "Do not verify SSL connections to Elasticsearch."),
            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
@@ -738,7 +541,11 @@ int main(int argc, const char *argv[]) {
    struct argparse argparse;
    argparse_init(&argparse, options, usage, 0);
-    argparse_describe(&argparse, DESCRIPTION, EPILOG);
+    argparse_describe(
            &argparse,
            "\nLightning-fast file system indexer and search tool.",
            "\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
    );
    argc = argparse_parse(&argparse, argc, argv);
    if (arg_version) {
@@ -806,7 +613,7 @@ int main(int argc, const char *argv[]) {
    } else {
        argparse_usage(&argparse);
-        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
+        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
    }
    printf("\n");
--- a/src/parsing/fs_util.h
+++ b/src/parsing/fs_util.h
@@ -0,0 +1,41 @@
 #ifndef SIST2_FS_UTIL_H
 #define SIST2_FS_UTIL_H
 #include "src/sist.h"
 #define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
 static int fs_read(struct vfile *f, void *buf, size_t size) {
    if (f->fd == -1) {
        SHA1_Init(&f->sha1_ctx);
        f->fd = open(f->filepath, O_RDONLY);
        if (f->fd == -1) {
            return -1;
        }
    }
    int ret = (int) read(f->fd, buf, size);
    if (ret != 0 && f->calculate_checksum) {
        f->has_checksum = TRUE;
        safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
    }
    return ret;
 }
 static void fs_close(struct vfile *f) {
    if (f->fd != -1) {
        SHA1_Final(f->sha1_digest, &f->sha1_ctx);
        close(f->fd);
    }
 }
 static void fs_reset(struct vfile *f) {
    if (f->fd != -1) {
        lseek(f->fd, 0, SEEK_SET);
    }
 }
 #endif
--- a/src/parsing/magic_util.c
+++ b/src/parsing/magic_util.c
@@ -0,0 +1,32 @@
 #include "magic_util.h"
 #include "src/log.h"
 #include "mime.h"
 #include <magic.h>
 #include "src/magic_generated.c"
 char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
    magic_t magic = magic_open(MAGIC_MIME_TYPE);
    const char *magic_buffers[1] = {magic_database_buffer,};
    size_t sizes[1] = {sizeof(magic_database_buffer),};
    // TODO optimisation: check if we can reuse the magic instance
    int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
    if (load_ret != 0) {
        LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret);
    }
    const char *magic_mime_str = magic_buffer(magic, buffer, buffer_size);
    char *return_value = NULL;
    if (magic_mime_str != NULL) {
        return_value = malloc(strlen(magic_mime_str) + 1);
        strcpy(return_value, magic_mime_str);
    }
    magic_close(magic);
    return return_value;
 }
--- a/src/parsing/magic_util.h
+++ b/src/parsing/magic_util.h
@@ -0,0 +1,8 @@
 #ifndef SIST2_MAGIC_UTIL_H
 #define SIST2_MAGIC_UTIL_H
 #include <stdio.h>
 char *magic_buffer_embedded(void *buffer, size_t buffer_size);
 #endif //SIST2_MAGIC_UTIL_H
--- a/src/parsing/mime.c
+++ b/src/parsing/mime.c
@@ -1,22 +1,30 @@
 #include "mime.h"
 #include <zlib.h>
-unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
+unsigned int mime_get_mime_by_ext(const char *ext) {
-    char lower[8];
+    unsigned char lower[16];
-    char *p = lower;
+    unsigned char *p = lower;
    int cnt = 0;
    while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
-        *p++ = (char)tolower(*ext++);
+        *p++ = tolower(*ext++);
        cnt++;
    }
    *p = '\0';
-    return (size_t) g_hash_table_lookup(ext_table, lower);
+
    unsigned long crc = crc32(0, lower, cnt);
    unsigned int mime = mime_extension_lookup(crc);
    return mime;
 }
-unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str) {
+unsigned int mime_get_mime_by_string(const char *str) {
-    const char * ptr = str;
+    const char *ptr = str;
    while (*ptr == ' ' || *ptr == '[') {
        ptr++;
    }
-    return (size_t) g_hash_table_lookup(mime_table, ptr);
+
    unsigned long crc = crc32(0, (unsigned char *) ptr, strlen(ptr));
    return mime_name_lookup(crc);
 }
--- a/src/parsing/mime.h
+++ b/src/parsing/mime.h
@@ -51,14 +51,14 @@ enum major_mime {
 enum mime;
-GHashTable *mime_get_mime_table();
+unsigned int mime_name_lookup(unsigned long mime_crc32);
-GHashTable *mime_get_ext_table();
+unsigned int mime_extension_lookup(unsigned long extension_crc32);
-char *mime_get_mime_text(unsigned int);
+const char *mime_get_mime_text(unsigned int);
-unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext);
+unsigned int mime_get_mime_by_ext(const char *ext);
-unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str);
+unsigned int mime_get_mime_by_string(const char *str);
 #endif
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -5,235 +5,234 @@
 #include "mime.h"
 #include "src/io/serialize.h"
 #include "src/parsing/sidecar.h"
-#include "src/magic_generated.c"
+#include "src/parsing/fs_util.h"
-
+#include "src/parsing/magic_util.h"
-#include <magic.h>
+#include <pthread.h>
 #define MIN_VIDEO_SIZE (1024 * 64)
 #define MIN_IMAGE_SIZE (512)
-int fs_read(struct vfile *f, void *buf, size_t size) {
+#define MAGIC_BUF_SIZE (4096 * 6)
-    if (f->fd == -1) {
+typedef enum {
-        SHA1_Init(&f->sha1_ctx);
+    FILETYPE_DONT_PARSE,
    FILETYPE_RAW,
    FILETYPE_MEDIA,
    FILETYPE_EBOOK,
    FILETYPE_MARKUP,
    FILETYPE_TEXT,
    FILETYPE_FONT,
    FILETYPE_ARCHIVE,
    FILETYPE_OOXML,
    FILETYPE_COMIC,
    FILETYPE_MOBI,
    FILETYPE_SIST2_SIDECAR,
    FILETYPE_MSDOC,
    FILETYPE_JSON,
    FILETYPE_NDJSON,
 } file_type_t;
-        f->fd = open(f->filepath, O_RDONLY);
+file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath) {
-        if (f->fd == -1) {
+
-            return -1;
+    int major_mime = MAJOR_MIME(mime);
    if (!(SHOULD_PARSE(mime))) {
        return FILETYPE_DONT_PARSE;
    } else if (IS_RAW(mime)) {
        return FILETYPE_RAW;
    } else if ((major_mime == MimeVideo && size >= MIN_VIDEO_SIZE) ||
               (major_mime == MimeImage && size >= MIN_IMAGE_SIZE) || major_mime == MimeAudio) {
        return FILETYPE_MEDIA;
    } else if (IS_PDF(mime)) {
        return FILETYPE_EBOOK;
    } else if (IS_MARKUP(mime)) {
        return FILETYPE_MARKUP;
    } else if (major_mime == MimeText) {
        return FILETYPE_TEXT;
    } else if (IS_FONT(mime)) {
        return FILETYPE_FONT;
    } else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
                    IS_ARC(mime) ||
                    (IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
            )) {
        return FILETYPE_ARCHIVE;
    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(mime)) {
        return FILETYPE_OOXML;
    } else if (is_cbr(&ScanCtx.comic_ctx, mime) || is_cbz(&ScanCtx.comic_ctx, mime)) {
        return FILETYPE_COMIC;
    } else if (IS_MOBI(mime)) {
        return FILETYPE_MOBI;
    } else if (mime == MIME_SIST2_SIDECAR) {
        return FILETYPE_SIST2_SIDECAR;
    } else if (is_msdoc(&ScanCtx.msdoc_ctx, mime)) {
        return FILETYPE_MSDOC;
    } else if (is_json(&ScanCtx.json_ctx, mime)) {
        return FILETYPE_JSON;
    } else if (is_ndjson(&ScanCtx.json_ctx, mime)) {
        return FILETYPE_NDJSON;
    }
 }
 #define GET_MIME_ERROR_FATAL (-1)
 int get_mime(parse_job_t *job) {
    char *extension = job->filepath + job->ext;
    int mime = 0;
    if (job->vfile.st_size == 0) {
        return MIME_EMPTY;
    }
    if (*extension != '\0' && (job->ext - job->base != 1)) {
        mime = (int) mime_get_mime_by_ext(extension);
        if (mime != 0) {
            return mime;
        }
    }
-    int ret = (int) read(f->fd, buf, size);
+    if (ScanCtx.fast) {
-
+        return 0;
    if (ret != 0 && f->calculate_checksum) {
        f->has_checksum = TRUE;
        safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
    }
-    return ret;
+    // Get mime type with libmagic
-}
+    if (job->vfile.read_rewindable == NULL) {
-
+        LOG_WARNING(job->filepath,
-#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
+                    "File does not support rewindable reads, cannot guess Media type");
-
+        return 0;
 void fs_close(struct vfile *f) {
    if (f->fd != -1) {
        SHA1_Final(f->sha1_digest, &f->sha1_ctx);
        close(f->fd);
    }
 }
 void fs_reset(struct vfile *f) {
    if (f->fd != -1) {
        lseek(f->fd, 0, SEEK_SET);
    }
 }
 void set_dbg_current_file(parse_job_t *job) {
    unsigned long long pid = (unsigned long long) pthread_self();
    pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
    g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
    pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
 }
 void parse(void *arg) {
    parse_job_t *job = arg;
    document_t *doc = malloc(sizeof(document_t));
    doc->filepath = malloc(strlen(job->filepath) + 1);
    set_dbg_current_file(job);
    strcpy(doc->filepath, job->filepath);
    doc->ext = (short) job->ext;
    doc->base = (short) job->base;
    char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
    generate_doc_id(rel_path, doc->doc_id);
    doc->meta_head = NULL;
    doc->meta_tail = NULL;
    doc->mime = 0;
    doc->size = job->vfile.info.st_size;
    doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
    int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
        pthread_mutex_lock(&ScanCtx.copy_table_mu);
        incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
        pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
        ScanCtx.dbg_skipped_files_count += 1;
        pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
        CLOSE_FILE(job->vfile)
        free(doc->filepath);
        free(doc);
        return;
    }
    if (ScanCtx.new_table != NULL) {
        pthread_mutex_lock(&ScanCtx.copy_table_mu);
        incremental_mark_file(ScanCtx.new_table, doc->doc_id);
        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
    }
    char *buf[MAGIC_BUF_SIZE];
-
+    int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
-    if (LogCtx.very_verbose) {
+    if (bytes_read < 0) {
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
+        if (job->vfile.is_fs_file) {
-    }
+            LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno));
    if (job->vfile.info.st_size == 0) {
        doc->mime = MIME_EMPTY;
    } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
        doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
    }
    if (doc->mime == 0 && !ScanCtx.fast) {
        // Get mime type with libmagic
        if (job->vfile.read_rewindable == NULL) {
            LOG_WARNING(job->filepath,
                        "File does not support rewindable reads, cannot guess Media type");
            goto abort;
        }
        int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
        if (bytes_read < 0) {
            if (job->vfile.is_fs_file) {
                LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
            } else {
                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
            }
            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
            ScanCtx.dbg_failed_files_count += 1;
            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
            CLOSE_FILE(job->vfile)
            free(doc->filepath);
            free(doc);
            return;
        }
        magic_t magic = magic_open(MAGIC_MIME_TYPE);
        const char *magic_buffers[1] = {magic_database_buffer,};
        size_t sizes[1] = {sizeof(magic_database_buffer),};
        int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
        if (load_ret != 0) {
            LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
        }
        const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
        if (magic_mime_str != NULL) {
            doc->mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
            LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
            if (doc->mime == 0) {
                LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
            }
        }
        if (job->vfile.reset != NULL) {
            job->vfile.reset(&job->vfile);
        }
        magic_close(magic);
    }
    int mmime = MAJOR_MIME(doc->mime);
    if (!(SHOULD_PARSE(doc->mime))) {
    } else if (IS_RAW(doc->mime)) {
        parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
    } else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
               (mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
        parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
    } else if (IS_PDF(doc->mime)) {
        parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
    } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
        if (IS_MARKUP(doc->mime)) {
            parse_markup(&ScanCtx.text_ctx, &job->vfile, doc);
        } else {
-            parse_text(&ScanCtx.text_ctx, &job->vfile, doc);
+            LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc));
        }
-    } else if (IS_FONT(doc->mime)) {
+        return GET_MIME_ERROR_FATAL;
-        parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
+    }
    char *magic_mime_str = magic_buffer_embedded(buf, bytes_read);
    if (magic_mime_str != NULL) {
        mime = (int) mime_get_mime_by_string(magic_mime_str);
        if (mime == 0) {
            LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
            free(magic_mime_str);
            return 0;
        }
        free(magic_mime_str);
    }
    if (job->vfile.reset != NULL) {
        job->vfile.reset(&job->vfile);
    }
    return mime;
 }
 void parse(parse_job_t *job) {
    if (job->vfile.is_fs_file) {
        job->vfile.read = fs_read;
        job->vfile.read_rewindable = fs_read;
        job->vfile.reset = fs_reset;
        job->vfile.close = fs_close;
        job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
    }
    document_t *doc = malloc(sizeof(document_t));
    strcpy(doc->filepath, job->filepath);
    doc->ext = job->ext;
    doc->base = job->base;
    doc->meta_head = NULL;
    doc->meta_tail = NULL;
    doc->size = job->vfile.st_size;
    doc->mtime = job->vfile.mtime;
    doc->mime = get_mime(job);
    generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
    if (doc->mime == GET_MIME_ERROR_FATAL) {
    } else if (
            ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
                    IS_ARC(doc->mime) ||
                    (IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
            )) {
        parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
        parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
    } else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
        parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc);
    } else if (IS_MOBI(doc->mime)) {
        parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
    } else if (doc->mime == MIME_SIST2_SIDECAR) {
        parse_sidecar(&job->vfile, doc);
        CLOSE_FILE(job->vfile)
        free(doc->filepath);
        free(doc);
        return;
    } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
        parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
    } else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
        parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
    } else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
        parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
    }
-    abort:
+    if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
        CLOSE_FILE(job->vfile)
        free(doc);
        return;
    }
    if (LogCtx.very_verbose) {
        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id);
    }
    switch (get_file_type(doc->mime, doc->size, doc->filepath)) {
        case FILETYPE_RAW:
            parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
            break;
        case FILETYPE_MEDIA:
            parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
            break;
        case FILETYPE_EBOOK:
            parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
            break;
        case FILETYPE_MARKUP:
            parse_markup(&ScanCtx.text_ctx, &job->vfile, doc);
            break;
        case FILETYPE_TEXT:
            parse_text(&ScanCtx.text_ctx, &job->vfile, doc);
            break;
        case FILETYPE_FONT:
            parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
            break;
        case FILETYPE_ARCHIVE:
            parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
            break;
        case FILETYPE_OOXML:
            parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
            break;
        case FILETYPE_COMIC:
            parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc);
            break;
        case FILETYPE_MOBI:
            parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
            break;
        case FILETYPE_SIST2_SIDECAR:
            parse_sidecar(&job->vfile, doc);
            CLOSE_FILE(job->vfile)
            free(doc);
            return;
        case FILETYPE_MSDOC:
            parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
            break;
        case FILETYPE_JSON:
            parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
            break;
        case FILETYPE_NDJSON:
            parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
            break;
        case FILETYPE_DONT_PARSE:
        default:
            break;
    }
    //Parent meta
    if (job->parent[0] != '\0') {
        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
        meta_parent->key = MetaParent;
        strcpy(meta_parent->str_val, job->parent);
-        APPEND_META((doc), meta_parent)
+        APPEND_META((doc), meta_parent);
        doc->has_parent = TRUE;
    } else {
        doc->has_parent = FALSE;
    }
    CLOSE_FILE(job->vfile)
@@ -246,7 +245,3 @@ void parse(void *arg) {
    write_document(doc);
 }
 void cleanup_parse() {
    // noop
 }
--- a/src/parsing/parse.h
+++ b/src/parsing/parse.h
@@ -2,15 +2,9 @@
 #define SIST2_PARSE_H
 #include "../sist.h"
 #include "src/tpool.h"
 #define MAGIC_BUF_SIZE (4096 * 6)
-int fs_read(struct vfile *f, void *buf, size_t size);
+void parse(parse_job_t *arg);
 void fs_close(struct vfile *f);
 void fs_reset(struct vfile *f);
 void parse(void *arg);
 void cleanup_parse();
 #endif
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@@ -4,12 +4,12 @@
 void parse_sidecar(vfile_t *vfile, document_t *doc) {
-    LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
+    LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath);
    size_t size;
    char *buf = read_all(vfile, &size);
    if (buf == NULL) {
-        LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
+        LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath);
        return;
    }
@@ -18,7 +18,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    cJSON *json = cJSON_Parse(buf);
    if (json == NULL) {
-        LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath)
+        LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath);
        return;
    }
    char *json_str = cJSON_PrintUnformatted(json);
@@ -32,8 +32,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    generate_doc_id(rel_path, assoc_doc_id);
-    store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
+    database_write_document_sidecar(ProcData.index_db, assoc_doc_id, json_str);
                strlen(json_str) + 1);
    cJSON_Delete(json);
    free(json_str);
--- a/src/sist.h
+++ b/src/sist.h
@@ -27,6 +27,8 @@
 #define UNUSED(x) __attribute__((__unused__))  x
 #define MAX_THREADS (256)
 #include "util.h"
 #include "log.h"
 #include "types.h"
@@ -49,8 +51,11 @@
 #include <ctype.h>
 #include "git_hash.h"
-#define VERSION "2.13.1"
+#define VERSION "3.0.0"
 static const char *const Version = VERSION;
 static const int VersionMajor = 3;
 static const int VersionMinor = 0;
 static const int VersionPatch = 0;
 #ifndef SIST_PLATFORM
 #define SIST_PLATFORM unknown
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,343 +0,0 @@
 #include "sist.h"
 #include "io/serialize.h"
 #include "ctx.h"
 static GHashTable *FlatTree;
 static GHashTable *BufferTable;
 static GHashTable *AggMime;
 static GHashTable *AggSize;
 static GHashTable *AggDate;
 #define SIZE_BUCKET (long)(5 * 1024 * 1024)
 #define DATE_BUCKET (long)(2629800)
 static long TotalSize = 0;
 static long DocumentCount = 0;
 typedef struct {
    long size;
    long count;
 } agg_t;
 void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
    if (cJSON_GetObjectItem(document, "parent") != NULL) {
        return;
    }
    const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
    char *path = malloc(strlen(json_path) + 1);
    strcpy(path, json_path);
    const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
    char *mime;
    if (json_mime == NULL) {
        mime = NULL;
    } else {
        mime = malloc(strlen(json_mime) + 1);
        strcpy(mime, json_mime);
    }
    long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
    int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
    // treemap
    void *existing_path = g_hash_table_lookup(FlatTree, path);
    if (existing_path == NULL) {
        g_hash_table_insert(FlatTree, path, (gpointer) size);
    } else {
        g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
    }
    // mime agg
    if (mime != NULL) {
        agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
        if (orig_agg == NULL) {
            agg_t *agg = malloc(sizeof(agg_t));
            agg->size = size;
            agg->count = 1;
            g_hash_table_insert(AggMime, mime, agg);
        } else {
            orig_agg->size += size;
            orig_agg->count += 1;
            free(mime);
        }
    }
    // size agg
    long size_bucket = size - (size % SIZE_BUCKET);
    agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
    if (orig_agg == NULL) {
        agg_t *agg = malloc(sizeof(agg_t));
        agg->size = size;
        agg->count = 1;
        g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
    } else {
        orig_agg->count += 1;
        orig_agg->size += size;
    }
    // date agg
    long date_bucket = mtime - (mtime % DATE_BUCKET);
    orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
    if (orig_agg == NULL) {
        agg_t *agg = malloc(sizeof(agg_t));
        agg->size = size;
        agg->count = 1;
        g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
    } else {
        orig_agg->count += 1;
        orig_agg->size += size;
    }
    TotalSize += size;
    DocumentCount += 1;
 }
 void read_index_into_tables(index_t *index) {
    char file_path[PATH_MAX];
    READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1);
 }
 static size_t rfind(const char *str, int c) {
    for (int i = (int)strlen(str); i >= 0; i--) {
        if (str[i] == c) {
            return i;
        }
    }
    return -1;
 }
 int merge_up(double thresh) {
    long min_size = (long) (thresh * (double) TotalSize);
    int count = 0;
    GHashTableIter iter;
    g_hash_table_iter_init(&iter, FlatTree);
    void *key;
    void *value;
    while (g_hash_table_iter_next(&iter, &key, &value)) {
        long size = (long) value;
        if (size < min_size) {
            int stop = rfind(key, '/');
            if (stop == -1) {
                stop = 0;
            }
            char *parent = malloc(stop + 1);
            strncpy(parent, key, stop);
            *(parent + stop) = '\0';
            void *existing_parent = g_hash_table_lookup(FlatTree, parent);
            if (existing_parent == NULL) {
                void *existing_parent2_key;
                void *existing_parent2_val;
                int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
                                                         &existing_parent2_val);
                if (!found) {
                    g_hash_table_insert(BufferTable, parent, value);
                } else {
                    g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
                    free(existing_parent2_key);
                }
            } else {
                g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
            }
            g_hash_table_iter_remove(&iter);
            count += 1;
        }
    }
    g_hash_table_iter_init(&iter, BufferTable);
    while (g_hash_table_iter_next(&iter, &key, &value)) {
        g_hash_table_insert(FlatTree, key, value);
        g_hash_table_iter_remove(&iter);
    }
    int size = g_hash_table_size(FlatTree);
    LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
    return count;
 }
 /**
 * Assumes out is at at least PATH_MAX *4
 */
 void csv_escape(char *dst, const char *str) {
    const char *ptr = str;
    char *out = dst;
    if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
        strcpy(dst, str);
        return;
    }
    *out++ = '"';
    char c;
    while ((c = *ptr++) != 0) {
        if (c == '"') {
            *out++ = '"';
            *out++ = '"';
        } else {
            *out++ = c;
        }
    }
    *out++ = '"';
    *out = '\0';
 }
 int open_or_exit(const char *path) {
    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
    if (fd < 0) {
        LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
    }
    return fd;
 }
 #define TREEMAP_CSV_HEADER "path,size"
 #define MIME_AGG_CSV_HEADER "mime,size,count"
 #define SIZE_AGG_CSV_HEADER "bucket,size,count"
 #define DATE_AGG_CSV_HEADER "bucket,size,count"
 void write_treemap_csv(double thresh, const char *out_path) {
    void *key;
    void *value;
    long min_size = (long) (thresh * (double) TotalSize);
    int fd = open_or_exit(out_path);
    int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
    if (ret == -1) {
        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
    }
    GHashTableIter iter;
    g_hash_table_iter_init(&iter, FlatTree);
    while (g_hash_table_iter_next(&iter, &key, &value)) {
        long size = (long) value;
        if (size >= min_size) {
            char path_buf[PATH_MAX * 4];
            char buf[PATH_MAX * 4 + 16];
            csv_escape(path_buf, key);
            size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
            ret = write(fd, buf, written);
            if (ret == -1) {
                LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
            }
        }
    }
    close(fd);
 }
 void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
    void *key;
    void *value;
    char buf[4096];
    int fd = open_or_exit(out_path);
    int ret = write(fd, header, strlen(header));
    if (ret == -1) {
        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
    }
    GHashTableIter iter;
    g_hash_table_iter_init(&iter, table);
    while (g_hash_table_iter_next(&iter, &key, &value)) {
        agg_t *agg = value;
        size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
        ret = write(fd, buf, written);
        if (ret == -1) {
            LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
        }
    }
    close(fd);
 }
 void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
    void *key;
    void *value;
    char buf[4096];
    int fd = open_or_exit(out_path);
    int ret = write(fd, header, strlen(header));
    if (ret == -1) {
        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
    }
    GHashTableIter iter;
    g_hash_table_iter_init(&iter, table);
    while (g_hash_table_iter_next(&iter, &key, &value)) {
        agg_t *agg = value;
        size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
        ret = write(fd, buf, written);
        if (ret == -1) {
            LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
        }
    }
    close(fd);
 }
 int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
    FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
    BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
    AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
    AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
    AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
    LOG_INFO("stats.c", "Generating stats...")
    read_index_into_tables(index);
    LOG_DEBUG("stats.c", "Read index into tables")
    LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
    LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
    LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
    while (merge_up(threshold) > 100) {}
    char tmp[PATH_MAX];
    strncpy(tmp, out_prefix, sizeof(tmp));
    strcat(tmp, "treemap.csv");
    write_treemap_csv(threshold, tmp);
    strncpy(tmp, out_prefix, sizeof(tmp));
    strcat(tmp, "mime_agg.csv");
    write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
    strncpy(tmp, out_prefix, sizeof(tmp));
    strcat(tmp, "size_agg.csv");
    write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
    strncpy(tmp, out_prefix, sizeof(tmp));
    strcat(tmp, "date_agg.csv");
    write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
    g_hash_table_remove_all(FlatTree);
    g_hash_table_destroy(FlatTree);
    g_hash_table_destroy(BufferTable);
    g_hash_table_remove_all(AggMime);
    g_hash_table_destroy(AggMime);
    g_hash_table_remove_all(AggSize);
    g_hash_table_destroy(AggSize);
    g_hash_table_remove_all(AggDate);
    g_hash_table_destroy(AggDate);
    return 0;
 }
--- a/src/stats.h
+++ b/src/stats.h
@@ -1,6 +0,0 @@
 #ifndef SIST2_STATS_H
 #define SIST2_STATS_H
 int generate_stats(index_t *index, double threshold, const char* out_prefix);
 #endif
--- a/src/tpool.c
+++ b/src/tpool.c
@@ -2,260 +2,264 @@
 #include "ctx.h"
 #include "sist.h"
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include "parsing/parse.h"
-#define MAX_QUEUE_SIZE 1000000
+#define BLANK_STR "                                         "
-typedef void (*thread_func_t)(void *arg);
+typedef struct {
    int thread_id;
    tpool_t *pool;
 } start_thread_arg_t;
 typedef struct tpool_work {
    void *arg;
    thread_func_t func;
    struct tpool_work *next;
 } tpool_work_t;
 typedef struct tpool {
-    tpool_work_t *work_head;
+    pthread_t threads[256];
-    tpool_work_t *work_tail;
+    int num_threads;
    pthread_mutex_t work_mutex;
    pthread_cond_t has_work_cond;
    pthread_cond_t working_cond;
    pthread_t *threads;
    int thread_cnt;
    int work_cnt;
    int done_cnt;
    int busy_cnt;
    int throttle_stuck_cnt;
    size_t mem_limit;
    size_t page_size;
    int free_arg;
    int stop;
    int waiting;
    int print_progress;
-    void (*cleanup_func)();
+    struct {
        job_type_t job_type;
        int stop;
        int waiting;
        database_ipc_ctx_t ipc_ctx;
        pthread_mutex_t mutex;
        pthread_mutex_t data_mutex;
        pthread_cond_t done_working_cond;
        pthread_cond_t workers_initialized_cond;
        int busy_count;
        int initialized_count;
        int thread_id_to_pid_mapping[MAX_THREADS];
        char ipc_database_filepath[128];
    } *shm;
 } tpool_t;
-
+void job_destroy(job_t *job) {
-/**
+    if (job->type == JOB_PARSE_JOB) {
- * Create a work object
+        free(job->parse_job);
 */
 static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
    if (func == NULL) {
        return NULL;
    }
-    tpool_work_t *work = malloc(sizeof(tpool_work_t));
+    free(job);
    work->func = func;
    work->arg = arg;
    work->next = NULL;
    return work;
 }
 void tpool_dump_debug_info(tpool_t *pool) {
    LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
    LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
    LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
    LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt)
    LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
 }
 /**
 * Pop work object from thread pool
 */
 static tpool_work_t *tpool_work_get(tpool_t *pool) {
    tpool_work_t *work = pool->work_head;
    if (work == NULL) {
        return NULL;
    }
    if (work->next == NULL) {
        pool->work_head = NULL;
        pool->work_tail = NULL;
    } else {
        pool->work_head = work->next;
    }
    return work;
 }
 /**
 * Push work object to thread pool
 */
-int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
+int tpool_add_work(tpool_t *pool, job_t *job) {
-    tpool_work_t *work = tpool_work_create(func, arg);
+    if (pool->shm->job_type == JOB_UNDEFINED) {
-    if (work == NULL) {
+        pool->shm->job_type = job->type;
-        return 0;
+    } else if (pool->shm->job_type != job->type) {
        LOG_FATAL("tpool.c", "FIXME: tpool cannot queue jobs with different types!");
    }
-    while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
+    database_add_work(ProcData.ipc_db, job);
        usleep(10000);
    }
-    pthread_mutex_lock(&(pool->work_mutex));
+    return TRUE;
    if (pool->work_head == NULL) {
        pool->work_head = work;
        pool->work_tail = pool->work_head;
    } else {
        pool->work_tail->next = work;
        pool->work_tail = work;
    }
    pool->work_cnt++;
    pthread_cond_broadcast(&(pool->has_work_cond));
    pthread_mutex_unlock(&(pool->work_mutex));
    return 1;
 }
-/**
+static void worker_thread_loop(tpool_t *pool) {
- * see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
+    while (TRUE) {
- */
+        if (pool->shm->stop) {
-__always_inline
+            break;
-static size_t _get_total_mem(tpool_t *pool) {
+        }
    FILE *statmfile = fopen("/proc/self/statm", "r");
    if (!statmfile)
        return 0;
-    long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
+        if (pool->shm->job_type == JOB_UNDEFINED) {
-    long int m_resident;
+            // Wait before first job is queued
            pthread_mutex_lock(&pool->shm->mutex);
            pthread_cond_timedwait_ms(&pool->shm->ipc_ctx.has_work_cond, &pool->shm->mutex, 1000);
            pthread_mutex_unlock(&pool->shm->mutex);
        }
-    int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
+        job_t *job = database_get_work(ProcData.ipc_db, pool->shm->job_type);
                   &dummy, /* m_virt */
                   &m_resident,
                   &dummy2, /* m_share */
                   &dummy3, /* m_trs */
                   &dummy4, /* unused since Linux 2.6; always 0 */
                   &dummy5, /* m_drs */
                   &dummy6); /* unused since Linux 2.6; always 0 */
    fclose(statmfile);
-    if (r == 7) {
+        if (job != NULL) {
-        return m_resident * pool->page_size;
+            pthread_mutex_lock(&(pool->shm->data_mutex));
-    } else {
+            pool->shm->busy_count += 1;
-        return 0;
+            pthread_mutex_unlock(&(pool->shm->data_mutex));
            if (pool->shm->stop) {
                break;
            }
            if (job->type == JOB_PARSE_JOB) {
                parse(job->parse_job);
            } else if (job->type == JOB_BULK_LINE) {
                elastic_index_line(job->bulk_line);
            }
            job_destroy(job);
            pthread_mutex_lock(&(pool->shm->data_mutex));
            pool->shm->busy_count -= 1;
            pthread_mutex_unlock(&(pool->shm->data_mutex));
            pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
            pool->shm->ipc_ctx.completed_job_count += 1;
            pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
        }
        if (pool->print_progress) {
            int done = pool->shm->ipc_ctx.completed_job_count;
            int count = pool->shm->ipc_ctx.completed_job_count + pool->shm->ipc_ctx.job_count;
            if (LogCtx.json_logs) {
                progress_bar_print_json(done,
                                        count,
                                        ScanCtx.stat_tn_size,
                                        ScanCtx.stat_index_size, pool->shm->waiting);
            } else {
                progress_bar_print((double) done / count,
                                   ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
            }
        }
        if (job == NULL) {
            pthread_mutex_lock(&pool->shm->mutex);
            pthread_cond_signal(&pool->shm->done_working_cond);
            pthread_mutex_unlock(&pool->shm->mutex);
        }
    }
 }
 static void worker_proc_init(tpool_t *pool, int thread_id) {
    pthread_mutex_lock(&pool->shm->data_mutex);
    pool->shm->thread_id_to_pid_mapping[thread_id] = getpid();
    pthread_mutex_unlock(&pool->shm->data_mutex);
    ProcData.thread_id = thread_id;
    if (ScanCtx.index.path[0] != '\0') {
        ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE);
        ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx;
        database_open(ProcData.index_db);
    }
    pthread_mutex_lock(&pool->shm->mutex);
    ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE);
    ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
    database_open(ProcData.ipc_db);
    pthread_mutex_unlock(&pool->shm->mutex);
 }
 void worker_proc_cleanup(tpool_t *pool) {
    if (ProcData.index_db != NULL) {
        database_close(ProcData.index_db, FALSE);
    }
    database_close(ProcData.ipc_db, FALSE);
 }
 #ifndef SIST_DEBUG
 #define TPOOL_FORK
 #endif
 /**
 * Thread worker function
 */
 static void *tpool_worker(void *arg) {
-    tpool_t *pool = arg;
+    tpool_t *pool = ((start_thread_arg_t *) arg)->pool;
    int stuck_notified = 0;
    int throttle_ms = 0;
 #ifdef TPOOL_FORK
    while (TRUE) {
-        pthread_mutex_lock(&pool->work_mutex);
+        int pid = fork();
-        if (pool->stop) {
+
        if (pid == 0) {
            worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
            pthread_mutex_lock(&pool->shm->mutex);
            pthread_cond_signal(&pool->shm->workers_initialized_cond);
            pool->shm->initialized_count += 1;
            pthread_mutex_unlock(&pool->shm->mutex);
            worker_thread_loop(pool);
            pthread_mutex_lock(&pool->shm->mutex);
            pthread_cond_signal(&pool->shm->done_working_cond);
            pthread_mutex_unlock(&pool->shm->mutex);
            worker_proc_cleanup(pool);
            exit(0);
        } else {
            int status;
            waitpid(pid, &status, 0);
            LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
            pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
            pool->shm->ipc_ctx.completed_job_count += 1;
            pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
            pthread_mutex_lock(&(pool->shm->data_mutex));
            pool->shm->busy_count -= 1;
            pthread_mutex_unlock(&(pool->shm->data_mutex));
            if (WIFSIGNALED(status)) {
                int crashed_thread_id = -1;
                for (int i = 0; i < MAX_THREADS; i++) {
                    if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
                        crashed_thread_id = i;
                        break;
                    }
                }
                const char *job_filepath;
                if (crashed_thread_id != -1) {
                    job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id];
                } else {
                    job_filepath = "unknown";
                }
                LOG_FATALF_NO_EXIT(
                        "tpool.c",
                        "Child process crashed (%s).\n"
                        BLANK_STR "The process was working on %s\n"
                        BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n"
                        BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n",
                        strsignal(WTERMSIG(status)),
                        job_filepath
                );
                continue;
            }
            break;
        }
        if (pool->work_head == NULL) {
            pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex));
        }
        tpool_work_t *work = tpool_work_get(pool);
        if (work != NULL) {
            pool->busy_cnt += 1;
        }
        pthread_mutex_unlock(&(pool->work_mutex));
        if (work != NULL) {
            stuck_notified = 0;
            throttle_ms = 0;
            while (!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
                if (!stuck_notified && throttle_ms >= 90000) {
                    // notify the pool that this thread is stuck.
                    pthread_mutex_lock(&(pool->work_mutex));
                    pool->throttle_stuck_cnt += 1;
                    if (pool->throttle_stuck_cnt == pool->thread_cnt) {
                        LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
                        pool->stop = TRUE;
                    }
                    pthread_mutex_unlock(&(pool->work_mutex));
                    stuck_notified = 1;
                }
                usleep(10000);
                throttle_ms += 10;
            }
            if (pool->stop) {
                break;
            }
            // we are not stuck anymore. cancel our notification.
            if (stuck_notified) {
                pthread_mutex_lock(&(pool->work_mutex));
                pool->throttle_stuck_cnt -= 1;
                pthread_mutex_unlock(&(pool->work_mutex));
            }
            work->func(work->arg);
            if (pool->free_arg) {
                free(work->arg);
            }
            free(work);
        }
        pthread_mutex_lock(&(pool->work_mutex));
        if (work != NULL) {
            pool->busy_cnt -= 1;
            pool->done_cnt++;
        }
        if (pool->print_progress) {
            if (LogCtx.json_logs) {
                progress_bar_print_json(pool->done_cnt, pool->work_cnt, ScanCtx.stat_tn_size,
                                        ScanCtx.stat_index_size, pool->waiting);
            } else {
                progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size,
                                   ScanCtx.stat_index_size);
            }
        }
        if (pool->work_head == NULL) {
            pthread_cond_signal(&(pool->working_cond));
        }
        pthread_mutex_unlock(&(pool->work_mutex));
    }
-    if (pool->cleanup_func != NULL) {
+#else
-        LOG_INFO("tpool.c", "Executing cleanup function")
+    worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
-        pool->cleanup_func();
+
-        LOG_DEBUG("tpool.c", "Done executing cleanup function")
+    pthread_mutex_lock(&pool->shm->mutex);
-    }
+    pthread_cond_signal(&pool->shm->workers_initialized_cond);
    pool->shm->initialized_count += 1;
    pthread_mutex_unlock(&pool->shm->mutex);
    worker_thread_loop(pool);
    pthread_mutex_lock(&pool->shm->mutex);
    pthread_cond_signal(&pool->shm->done_working_cond);
    pthread_mutex_unlock(&pool->shm->mutex);
 #endif
    pthread_cond_signal(&(pool->working_cond));
    pthread_mutex_unlock(&(pool->work_mutex));
    return NULL;
 }
 void tpool_wait(tpool_t *pool) {
-    LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
+    LOG_DEBUG("tpool.c", "Waiting for worker threads to finish");
-    pthread_mutex_lock(&(pool->work_mutex));
+    pthread_mutex_lock(&pool->shm->mutex);
-    pool->waiting = TRUE;
+    pool->shm->waiting = TRUE;
    pool->shm->ipc_ctx.no_more_jobs = TRUE;
    while (TRUE) {
-        if (pool->done_cnt < pool->work_cnt) {
+        if (pool->shm->ipc_ctx.job_count > 0) {
-            pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
+            pthread_cond_wait(&(pool->shm->done_working_cond), &pool->shm->mutex);
        } else {
-            LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt);
+            if (pool->shm->ipc_ctx.job_count == 0 && pool->shm->busy_count == 0) {
-
+                pool->shm->stop = TRUE;
            if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) {
                pool->stop = TRUE;
                break;
            }
        }
@@ -263,34 +267,21 @@ void tpool_wait(tpool_t *pool) {
    if (pool->print_progress && !LogCtx.json_logs) {
        progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
    }
-    pthread_mutex_unlock(&(pool->work_mutex));
+    pthread_mutex_unlock(&pool->shm->mutex);
-    LOG_INFO("tpool.c", "Worker threads finished")
+    LOG_INFO("tpool.c", "Worker threads finished");
 }
 void tpool_destroy(tpool_t *pool) {
-    if (pool == NULL) {
+    LOG_INFO("tpool.c", "Destroying thread pool");
        return;
    }
-    LOG_INFO("tpool.c", "Destroying thread pool")
+    database_close(ProcData.ipc_db, FALSE);
-    pthread_mutex_lock(&(pool->work_mutex));
+    pthread_mutex_lock(&pool->shm->mutex);
-    tpool_work_t *work = pool->work_head;
+    pthread_cond_broadcast(&pool->shm->ipc_ctx.has_work_cond);
-    int count = 0;
+    pthread_mutex_unlock(&pool->shm->mutex);
    while (work != NULL) {
        tpool_work_t *tmp = work->next;
        free(work);
        work = tmp;
        count += 1;
    }
-    LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count);
+    for (size_t i = 0; i < pool->num_threads; i++) {
    pthread_cond_broadcast(&(pool->has_work_cond));
    pthread_mutex_unlock(&(pool->work_mutex));
    for (size_t i = 0; i < pool->thread_cnt; i++) {
        pthread_t thread = pool->threads[i];
        if (thread != 0) {
            void *_;
@@ -298,53 +289,79 @@ void tpool_destroy(tpool_t *pool) {
        }
    }
-    LOG_INFO("tpool.c", "Final cleanup")
+    pthread_mutex_destroy(&pool->shm->ipc_ctx.mutex);
    pthread_mutex_destroy(&pool->shm->mutex);
    pthread_cond_destroy(&pool->shm->ipc_ctx.has_work_cond);
    pthread_cond_destroy(&pool->shm->done_working_cond);
-    pthread_mutex_destroy(&(pool->work_mutex));
+    munmap(pool->shm, sizeof(*pool->shm));
    pthread_cond_destroy(&(pool->has_work_cond));
    pthread_cond_destroy(&(pool->working_cond));
    free(pool->threads);
    free(pool);
 }
 /**
 * Create a thread pool
 * @param thread_cnt Worker threads count
 */
-tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
+tpool_t *tpool_create(int thread_cnt, int print_progress) {
    tpool_t *pool = malloc(sizeof(tpool_t));
-    pool->thread_cnt = thread_cnt;
+
-    pool->work_cnt = 0;
+    pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-    pool->done_cnt = 0;
+
-    pool->busy_cnt = 0;
+    pool->num_threads = thread_cnt;
-    pool->throttle_stuck_cnt = 0;
+    pool->shm->ipc_ctx.job_count = 0;
-    pool->mem_limit = mem_limit;
+    pool->shm->ipc_ctx.no_more_jobs = FALSE;
-    pool->stop = FALSE;
+    pool->shm->stop = FALSE;
-    pool->waiting = FALSE;
+    pool->shm->waiting = FALSE;
-    pool->free_arg = free_arg;
+    pool->shm->job_type = JOB_UNDEFINED;
-    pool->cleanup_func = cleanup_func;
+    memset(pool->threads, 0, sizeof(pool->threads));
    pool->threads = calloc(sizeof(pthread_t), thread_cnt);
    pool->print_progress = print_progress;
-    pool->page_size = getpagesize();
+    sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());
-    pthread_mutex_init(&(pool->work_mutex), NULL);
+    pthread_mutexattr_t mutexattr;
    pthread_mutexattr_init(&mutexattr);
    pthread_mutexattr_setpshared(&mutexattr, TRUE);
-    pthread_cond_init(&(pool->has_work_cond), NULL);
+    pthread_mutex_init(&(pool->shm->mutex), &mutexattr);
-    pthread_cond_init(&(pool->working_cond), NULL);
+    pthread_mutex_init(&(pool->shm->data_mutex), &mutexattr);
    pthread_mutex_init(&(pool->shm->ipc_ctx.mutex), &mutexattr);
    pthread_mutex_init(&(pool->shm->ipc_ctx.db_mutex), &mutexattr);
    pthread_mutex_init(&(pool->shm->ipc_ctx.index_db_mutex), &mutexattr);
-    pool->work_head = NULL;
+    pthread_condattr_t condattr;
-    pool->work_tail = NULL;
+    pthread_condattr_init(&condattr);
    pthread_condattr_setpshared(&condattr, TRUE);
    pthread_cond_init(&(pool->shm->ipc_ctx.has_work_cond), &condattr);
    pthread_cond_init(&(pool->shm->done_working_cond), &condattr);
    pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr);
    ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE);
    ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
    database_initialize(ProcData.ipc_db);
    return pool;
 }
 void tpool_start(tpool_t *pool) {
-    LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt)
+    LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->num_threads);
-    for (size_t i = 0; i < pool->thread_cnt; i++) {
+    pthread_mutex_lock(&pool->shm->mutex);
-        pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
+
    for (int i = 0; i < pool->num_threads; i++) {
        start_thread_arg_t *arg = malloc(sizeof(start_thread_arg_t));
        arg->thread_id = i + 1;
        arg->pool = pool;
        pthread_create(&pool->threads[i], NULL, tpool_worker, arg);
    }
    // Only open the database when all workers are done initializing
    while (pool->shm->initialized_count != pool->num_threads) {
        pthread_cond_wait(&pool->shm->workers_initialized_cond, &pool->shm->mutex);
    }
    pthread_mutex_unlock(&pool->shm->mutex);
    database_open(ProcData.ipc_db);
 }
--- a/src/tpool.h
+++ b/src/tpool.h
@@ -2,20 +2,24 @@
 #define SIST2_TPOOL_H
 #include "sist.h"
 #include "third-party/libscan/libscan/scan.h"
 #include "index/elastic.h"
 #include "src/database/database.h"
 struct tpool;
 typedef struct tpool tpool_t;
-typedef void (*thread_func_t)(void *arg);
+tpool_t *tpool_create(int num, int print_progress);
 tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
 void tpool_start(tpool_t *pool);
 void tpool_destroy(tpool_t *pool);
-int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
+int tpool_add_work(tpool_t *pool, job_t *job);
 void tpool_wait(tpool_t *pool);
-void tpool_dump_debug_info(tpool_t *pool);
+void job_destroy(job_t *job);
 #endif
--- a/src/types.h
+++ b/src/types.h
@@ -1,24 +1,26 @@
 #ifndef SIST2_TYPES_H
 #define SIST2_TYPES_H
-#define INDEX_TYPE_NDJSON "ndjson"
+typedef struct database database_t;
 typedef struct index_descriptor {
    char id[SIST_INDEX_ID_LEN];
    char version[64];
    int version_major;
    int version_minor;
    int version_patch;
    long timestamp;
    char root[PATH_MAX];
    char rewrite_url[8192];
-    short root_len;
+    int root_len;
    char name[1024];
    char type[64];
 } index_descriptor_t;
 typedef struct index_t {
    struct index_descriptor desc;
-    struct store_t *store;
+
-    struct store_t *tag_store;
+    database_t *db;
-    struct store_t *meta_store;
+
    char path[PATH_MAX];
 } index_t;
--- a/src/util.c
+++ b/src/util.c
@@ -25,7 +25,6 @@ dyn_buffer_t url_escape(char *str) {
 }
 char *abspath(const char *path) {
    char *expanded = expandpath(path);
    char *abs = realpath(expanded, NULL);
@@ -34,8 +33,7 @@ char *abspath(const char *path) {
        return NULL;
    }
    if (strlen(abs) > 1) {
-        abs = realloc(abs, strlen(abs) + 2);
+        abs = realloc(abs, strlen(abs) + 1);
        strcat(abs, "/");
    }
    return abs;
@@ -76,9 +74,8 @@ char *expandpath(const char *path) {
        }
    }
-    char *expanded = malloc(strlen(tmp) + 2);
+    char *expanded = malloc(strlen(tmp) + 1);
    strcpy(expanded, tmp);
    strcat(expanded, "/");
    wordfree(&w);
    return expanded;
@@ -103,7 +100,13 @@ void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t i
 void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
    if (isnan(percentage)) {
        return;
    }
    // TODO: Fix this with shm/ctx
    static int last_val = -1;
    int val = (int) (percentage * 100);
    if (last_val == val || val > 100) {
        return;
@@ -148,10 +151,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
    PrintingProgressBar = TRUE;
 }
 GHashTable *incremental_get_table() {
    GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
    return file_table;
 }
 const char *find_file_in_paths(const char *paths[], const char *filename) {
@@ -165,7 +164,7 @@ const char *find_file_in_paths(const char *paths[], const char *filename) {
        char path[PATH_MAX];
        snprintf(path, sizeof(path), "%s%s", apath, filename);
-        LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
+        LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath);
        free(apath);
        struct stat info;
@@ -267,3 +266,39 @@ void str_unescape(char *dst, const char *str) {
    }
    *cur = '\0';
 }
 #define NSEC_PER_SEC 1000000000
 struct timespec timespec_normalise(struct timespec ts) {
    while (ts.tv_nsec >= NSEC_PER_SEC) {
        ts.tv_sec += 1;
        ts.tv_nsec -= NSEC_PER_SEC;
    }
    while (ts.tv_nsec <= -NSEC_PER_SEC) {
        ts.tv_sec -= 1;
        ts.tv_nsec += NSEC_PER_SEC;
    }
    if (ts.tv_nsec < 0) {
        ts.tv_sec -= 1;
        ts.tv_nsec = (NSEC_PER_SEC + ts.tv_nsec);
    }
    return ts;
 }
 struct timespec timespec_add(struct timespec ts1, long usec) {
    ts1 = timespec_normalise(ts1);
    struct timespec ts2 = timespec_normalise((struct timespec) {
            .tv_sec = 0,
            .tv_nsec = usec * 1000
    });
    ts1.tv_sec += ts2.tv_sec;
    ts1.tv_nsec += ts2.tv_nsec;
    return timespec_normalise(ts1);
 }
--- a/src/util.h
+++ b/src/util.h
@@ -5,8 +5,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <glib.h>
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"
@@ -22,9 +20,6 @@ extern int PrintingProgressBar;
 void progress_bar_print_json(size_t done, size_t count,  size_t tn_size, size_t index_size, int waiting);
 void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
 GHashTable *incremental_get_table();
 const char *find_file_in_paths(const char **paths, const char *filename);
@@ -100,31 +95,23 @@ static void generate_doc_id(const char *rel_path, char *doc_id) {
    buf2hex(md, sizeof(md), doc_id);
 }
-__always_inline
+#define MILLISECOND 1000
 static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
    char *ptr = malloc(SIST_DOC_ID_LEN);
    strcpy(ptr, doc_id);
    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
 }
-__always_inline
+struct timespec timespec_add(struct timespec ts1, long usec);
 static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
    if (table != NULL) {
        return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
    } else {
        return 0;
    }
 }
-/**
+#define TIMER_INIT() struct timespec timer_begin
- * Marks a file by adding it to a table.
+#define TIMER_START() clock_gettime(CLOCK_REALTIME, &timer_begin)
- * !!Not thread safe.
+#define TIMER_END(x) do { \
- */
+    struct timespec timer_end;                   \
-__always_inline
+    clock_gettime(CLOCK_REALTIME, &timer_end);   \
-static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
+    x = (timer_end.tv_sec - timer_begin.tv_sec) * 1000000 + (timer_end.tv_nsec - timer_begin.tv_nsec) / 1000; \
-    char *ptr = malloc(SIST_DOC_ID_LEN);
+} while (0)
-    strcpy(ptr, doc_id);
+
-    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
+#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
-}
+        struct timespec now; \
        clock_gettime(CLOCK_REALTIME, &now); \
        struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \
        pthread_cond_timedwait(cond, mutex, &end_time); \
    } while (0)
 #endif
--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -1,15 +1,13 @@
 #include "serve.h"
 #include "src/sist.h"
 #include "src/io/store.h"
 #include "static_generated.c"
 #include "src/index/elastic.h"
 #include "src/index/web.h"
 #include "src/auth0/auth0_c_api.h"
 #include "src/web/web_util.h"
 #include <src/ctx.h>
 #define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
 #define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
 #define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
@@ -20,62 +18,6 @@ static struct mg_http_serve_opts DefaultServeOpts = {
        .mime_types = ""
 };
 __always_inline
 static char *address_to_string(struct mg_addr *addr) {
    static char address_to_string_buf[INET6_ADDRSTRLEN];
    return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
 }
 static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
    mg_printf(
            nc,
            "HTTP/1.1 %d %s\r\n"
            HTTP_SERVER_HEADER
            "Content-Length: %d\r\n"
            "%s\r\n\r\n",
            status_code, "OK",
            length,
            extra_headers
    );
 }
 index_t *get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
            return &WebCtx.indices[i];
        }
    }
    return NULL;
 }
 store_t *get_store(const char *index_id) {
    index_t *idx = get_index_by_id(index_id);
    if (idx != NULL) {
        return idx->store;
    }
    return NULL;
 }
 store_t *get_tag_store(const char *index_id) {
    index_t *idx = get_index_by_id(index_id);
    if (idx != NULL) {
        return idx->tag_store;
    }
    return NULL;
 }
 void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
    } else {
        send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
        mg_send(nc, index_html, sizeof(index_html));
    }
 }
 void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
@@ -87,7 +29,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
-    index_t *index = get_index_by_id(arg_index_id);
+    index_t *index = web_get_index_by_id(arg_index_id);
    if (index == NULL) {
        HTTP_REPLY_NOT_FOUND
        return;
@@ -123,87 +65,58 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    mg_http_serve_file(nc, hm, full_path, &opts);
 }
-void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
    } else {
        web_serve_asset_index_html(nc);
    }
 }
 void serve_index_js(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
    } else {
-        send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
+        web_serve_asset_index_js(nc);
        mg_send(nc, index_js, sizeof(index_js));
    }
 }
-void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
    } else {
-        send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
+        web_serve_asset_chunk_vendors_js(nc);
        mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
    }
 }
-void favicon(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
+    web_serve_asset_favicon_ico(nc);
    mg_send(nc, favicon_ico, sizeof(favicon_ico));
 }
-void style(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(index_css), "Content-Type: text/css");
+    web_serve_asset_style_css(nc);
    mg_send(nc, index_css, sizeof(index_css));
 }
-void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
+    web_serve_asset_chunk_vendors_css(nc);
    mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
 }
-void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const char *arg_index,
        const char *arg_doc_id, int arg_num) {
-    int has_thumbnail_index = FALSE;
+    database_t *db = web_get_database(arg_index);
-
+    if (db == NULL) {
-    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
+        LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index);
        if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
            LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
            HTTP_REPLY_NOT_FOUND
            return;
        }
        has_thumbnail_index = TRUE;
    }
    char arg_doc_id[SIST_DOC_ID_LEN];
    char arg_index[SIST_INDEX_ID_LEN];
    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
    store_t *store = get_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
        HTTP_REPLY_NOT_FOUND
        return;
    }
    char *data;
    size_t data_len = 0;
-    if (has_thumbnail_index) {
+    void *data = database_read_thumbnail(db, arg_doc_id, arg_num, &data_len);
        const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
        char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
        memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
        memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
        *(tn_key + sizeof(tn_key) - 1) = '\0';
        data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
    } else {
        data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
    }
    if (data_len != 0) {
-        send_response_line(
+        web_send_headers(
                nc, 200, data_len,
                "Content-Type: image/jpeg\r\n"
                "Cache-Control: max-age=31536000"
@@ -216,10 +129,50 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
    }
 }
-void search(struct mg_connection *nc, struct mg_http_message *hm) {
+void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 5) {
        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
        HTTP_REPLY_NOT_FOUND
        return;
    }
    char arg_doc_id[SIST_DOC_ID_LEN];
    char arg_index[SIST_INDEX_ID_LEN];
    char arg_num[5] = {0};
    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
    memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, 4);
    int num = (int) strtol(arg_num, NULL, 10);
    serve_thumbnail(nc, hm, arg_index, arg_doc_id, num);
 }
 void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
        HTTP_REPLY_NOT_FOUND
        return;
    }
    char arg_doc_id[SIST_DOC_ID_LEN];
    char arg_index[SIST_INDEX_ID_LEN];
    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
    serve_thumbnail(nc, hm, arg_index, arg_doc_id, 0);
 }
 void search(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->body.len == 0) {
-        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
+        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request");
        mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
        return;
    }
@@ -266,7 +219,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
    if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
        LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
-                               "serving file from disk might not work as expected.")
+                               "serving file from disk might not work as expected.");
    }
    const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
@@ -285,7 +238,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
             idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/",
             name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
-    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
+    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path);
    char disposition[8192];
    snprintf(disposition, sizeof(disposition),
@@ -372,7 +325,7 @@ void index_info(struct mg_connection *nc) {
    char *json_str = cJSON_PrintUnformatted(json);
-    send_response_line(nc, 200, strlen(json_str), "Content-Type: application/json");
+    web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
    mg_send(nc, json_str, strlen(json_str));
    free(json_str);
    cJSON_Delete(json);
@@ -382,7 +335,7 @@ void index_info(struct mg_connection *nc) {
 void file(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
-        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
+        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
        HTTP_REPLY_NOT_FOUND
        return;
    }
@@ -412,7 +365,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
        next = parent->valuestring;
    }
-    index_t *idx = get_index_by_id(index_id->valuestring);
+    index_t *idx = web_get_index_by_id(index_id->valuestring);
    if (idx == NULL) {
        cJSON_Delete(doc);
@@ -431,9 +384,9 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
 void status(struct mg_connection *nc) {
    char *status = elastic_get_status();
    if (strcmp(status, "open") == 0) {
-        send_response_line(nc, 204, 0, "Content-Type: application/json");
+        web_send_headers(nc, 204, 0, "Content-Type: application/json");
    } else {
-        send_response_line(nc, 500, 0, "Content-Type: application/json");
+        web_send_headers(nc, 500, 0, "Content-Type: application/json");
    }
    free(status);
@@ -475,114 +428,114 @@ tag_req_t *parse_tag_request(cJSON *json) {
 }
 void tag(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
+//    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
-        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
+//        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        HTTP_REPLY_NOT_FOUND
+//        HTTP_REPLY_NOT_FOUND
-        return;
+//        return;
-    }
+//    }
-
+//
-    char arg_index[SIST_INDEX_ID_LEN];
+//    char arg_index[SIST_INDEX_ID_LEN];
-    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
+//    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
-    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+//    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
-
+//
-    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
+//    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
-        LOG_DEBUG("serve.c", "Invalid tag request")
+//        LOG_DEBUG("serve.c", "Invalid tag request")
-        HTTP_REPLY_NOT_FOUND
+//        HTTP_REPLY_NOT_FOUND
-        return;
+//        return;
-    }
+//    }
-
+//
-    store_t *store = get_tag_store(arg_index);
+//    store_t *store = get_tag_store(arg_index);
-    if (store == NULL) {
+//    if (store == NULL) {
-        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
+//        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
-        HTTP_REPLY_NOT_FOUND
+//        HTTP_REPLY_NOT_FOUND
-        return;
+//        return;
-    }
+//    }
-
+//
-    char *body = malloc(hm->body.len + 1);
+//    char *body = malloc(hm->body.len + 1);
-    memcpy(body, hm->body.ptr, hm->body.len);
+//    memcpy(body, hm->body.ptr, hm->body.len);
-    *(body + hm->body.len) = '\0';
+//    *(body + hm->body.len) = '\0';
-    cJSON *json = cJSON_Parse(body);
+//    cJSON *json = cJSON_Parse(body);
-
+//
-    tag_req_t *arg_req = parse_tag_request(json);
+//    tag_req_t *arg_req = parse_tag_request(json);
-    if (arg_req == NULL) {
+//    if (arg_req == NULL) {
-        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
+//        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
-        cJSON_Delete(json);
+//        cJSON_Delete(json);
-        free(body);
+//        free(body);
-        mg_http_reply(nc, 400, "", "Invalid request");
+//        mg_http_reply(nc, 400, "", "Invalid request");
-        return;
+//        return;
-    }
+//    }
-
+//
-    cJSON *arr = NULL;
+//    cJSON *arr = NULL;
-
+//
-    size_t data_len = 0;
+//    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
+//    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
-    if (data_len == 0) {
+//    if (data_len == 0) {
-        arr = cJSON_CreateArray();
+//        arr = cJSON_CreateArray();
-    } else {
+//    } else {
-        arr = cJSON_Parse(data);
+//        arr = cJSON_Parse(data);
-    }
+//    }
-
+//
-    if (arg_req->delete) {
+//    if (arg_req->delete) {
-
+//
-        if (data_len > 0) {
+//        if (data_len > 0) {
-            cJSON *element = NULL;
+//            cJSON *element = NULL;
-            int i = 0;
+//            int i = 0;
-            cJSON_ArrayForEach(element, arr) {
+//            cJSON_ArrayForEach(element, arr) {
-                if (strcmp(element->valuestring, arg_req->name) == 0) {
+//                if (strcmp(element->valuestring, arg_req->name) == 0) {
-                    cJSON_DeleteItemFromArray(arr, i);
+//                    cJSON_DeleteItemFromArray(arr, i);
-                    break;
+//                    break;
-                }
+//                }
-                i++;
+//                i++;
-            }
+//            }
-        }
+//        }
-
+//
-        char *buf = malloc(sizeof(char) * 8192);
+//        char *buf = malloc(sizeof(char) * 8192);
-        snprintf(buf, 8192,
+//        snprintf(buf, 8192,
-                 "{"
+//                 "{"
-                 "    \"script\" : {"
+//                 "    \"script\" : {"
-                 "        \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
+//                 "        \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
-                 "        \"lang\": \"painless\","
+//                 "        \"lang\": \"painless\","
-                 "        \"params\" : {"
+//                 "        \"params\" : {"
-                 "            \"tag\" : \"%s\""
+//                 "            \"tag\" : \"%s\""
-                 "        }"
+//                 "        }"
-                 "    }"
+//                 "    }"
-                 "}", arg_req->name
+//                 "}", arg_req->name
-        );
+//        );
-
+//
-        char url[4096];
+//        char url[4096];
-        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
+//        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
+//        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
-
+//
-    } else {
+//    } else {
-        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
+//        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
-
+//
-        char *buf = malloc(sizeof(char) * 8192);
+//        char *buf = malloc(sizeof(char) * 8192);
-        snprintf(buf, 8192,
+//        snprintf(buf, 8192,
-                 "{"
+//                 "{"
-                 "    \"script\" : {"
+//                 "    \"script\" : {"
-                 "        \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
+//                 "        \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
-                 "        \"lang\": \"painless\","
+//                 "        \"lang\": \"painless\","
-                 "        \"params\" : {"
+//                 "        \"params\" : {"
-                 "            \"tag\" : \"%s\""
+//                 "            \"tag\" : \"%s\""
-                 "        }"
+//                 "        }"
-                 "    }"
+//                 "    }"
-                 "}", arg_req->name
+//                 "}", arg_req->name
-        );
+//        );
-
+//
-        char url[4096];
+//        char url[4096];
-        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
+//        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
+//        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
-    }
+//    }
-
+//
-    char *json_str = cJSON_PrintUnformatted(arr);
+//    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
+//    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
-    store_flush(store);
+//    store_flush(store);
-
+//
-    free(arg_req);
+//    free(arg_req);
-    free(json_str);
+//    free(json_str);
-    cJSON_Delete(json);
+//    cJSON_Delete(json);
-    cJSON_Delete(arr);
+//    cJSON_Delete(arr);
-    free(body);
+//    free(body);
 }
 int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
@@ -601,7 +554,7 @@ int check_auth0(struct mg_http_message *hm) {
    struct mg_str *cookie = mg_http_get_header(hm, "Cookie");
    if (cookie == NULL) {
-        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)")
+        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)");
        return FALSE;
    }
@@ -610,7 +563,7 @@ int check_auth0(struct mg_http_message *hm) {
    token = mg_http_get_header_var(*cookie, mg_str("sist2-auth0"));
    if (token.len == 0) {
-        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)")
+        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)");
        return FALSE;
    }
@@ -644,28 +597,31 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
            }
        }
        char uri[256];
        memcpy(uri, hm->uri.ptr, hm->uri.len);
        *(uri + hm->uri.len) = '\0';
        LOG_DEBUGF("serve.c", "<%s> GET %s",
-                   address_to_string(&(nc->rem)),
+                   web_address_to_string(&(nc->rem)),
-                   hm->uri
+                   uri
-        )
+        );
        if (mg_http_match_uri(hm, "/")) {
-            search_index(nc, hm);
+            serve_index_html(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/favicon.ico")) {
-            favicon(nc, hm);
+            serve_favicon_ico(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/css/index.css")) {
-            style(nc, hm);
+            serve_style_css(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/css/chunk-vendors.css")) {
-            style_vendor(nc, hm);
+            serve_chunk_vendors_css(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/js/index.js")) {
-            javascript(nc, hm);
+            serve_index_js(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/js/chunk-vendors.js")) {
-            javascript_vendor(nc, hm);
+            serve_chunk_vendors_js(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/i")) {
            index_info(nc);
@@ -683,6 +639,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
            status(nc);
        } else if (mg_http_match_uri(hm, "/f/*")) {
            file(nc, hm);
        } else if (mg_http_match_uri(hm, "/t/*/*/*")) {
            thumbnail_with_num(nc, hm);
        } else if (mg_http_match_uri(hm, "/t/*/*")) {
            thumbnail(nc, hm);
        } else if (mg_http_match_uri(hm, "/s/*/*")) {
@@ -706,7 +664,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                response_t *r = ctx->response;
                if (r->status_code == 200) {
-                    send_response_line(nc, 200, r->size, "Content-Type: application/json");
+                    web_send_headers(nc, 200, r->size, "Content-Type: application/json");
                    mg_send(nc, r->body, r->size);
                } else if (r->status_code == 0) {
                    sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");
@@ -738,7 +696,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
 void serve(const char *listen_address) {
-    LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)
+    LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address);
    struct mg_mgr mgr;
    mg_mgr_init(&mgr);
@@ -747,12 +705,12 @@ void serve(const char *listen_address) {
    struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
    if (nc == NULL) {
-        LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
+        LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address);
    }
    while (ok) {
        mg_mgr_poll(&mgr, 10);
    }
    mg_mgr_free(&mgr);
-    LOG_INFO("serve.c", "Finished web event loop")
+    LOG_INFO("serve.c", "Finished web event loop");
 }
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/src/web/web_util.c
+++ b/src/web/web_util.c
@@ -0,0 +1,63 @@
 #include "web_util.h"
 #include "static_generated.c"
 void web_serve_asset_index_html(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(index_html), "Content-Type: text/html");
    mg_send(nc, index_html, sizeof(index_html));
 }
 void web_serve_asset_index_js(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
    mg_send(nc, index_js, sizeof(index_js));
 }
 void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
    mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
 }
 void web_serve_asset_favicon_ico(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
    mg_send(nc, favicon_ico, sizeof(favicon_ico));
 }
 void web_serve_asset_style_css(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css");
    mg_send(nc, index_css, sizeof(index_css));
 }
 void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) {
    web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
    mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
 }
 index_t *web_get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
            return &WebCtx.indices[i];
        }
    }
    return NULL;
 }
 database_t *web_get_database(const char *index_id) {
    index_t *idx = web_get_index_by_id(index_id);
    if (idx != NULL) {
        return idx->db;
    }
    return NULL;
 }
 void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
    mg_printf(
            nc,
            "HTTP/1.1 %d %s\r\n"
    HTTP_SERVER_HEADER
    "Content-Length: %d\r\n"
    "%s\r\n\r\n",
            status_code, "OK",
            length,
            extra_headers
    );
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
simon987	903feb4889	Update CI build script	2023-04-10 20:01:49 -04:00
simon987	01490d1cbf	Update sist2-admin for 3.x.x, more fixes	2023-04-10 19:45:08 -04:00
simon987	6182338f29	Update dependencies, fix some build issues	2023-04-10 15:10:56 -04:00
simon987	300c70883d	Fixes and cleanup	2023-04-10 11:04:16 -04:00
simon987	fc36f33d52	use sqlite to save index, major thread pool refactor	2023-04-03 21:39:50 -04:00
simon987	ca973d63a4	Still WIP..	2023-03-12 11:38:31 -04:00
simon987	f8abffba81	process pool mostly works, still WIP	2023-03-09 22:11:21 -05:00
simon987	8c662bb8f8	Adjust some structs	2023-02-27 20:44:25 -05:00
simon987	9c40dddd41	remove deprecated note	2023-02-26 11:03:29 -05:00
simon987	d259b95017	Update sist2-admin database schema, fix thumbnail-size	2023-02-26 10:42:20 -05:00
simon987	707bac86b3	Fix #329 , version bump	2023-02-23 21:21:54 -05:00
simon987	8b9b067c06	Fix #332	2023-02-23 19:53:05 -05:00
simon987	b17f3ff924	Merge pull request #338 from simon987/dependabot/npm_and_yarn/sist2-admin/frontend/sideway/formula-3.0.1 Bump @sideway/formula from 3.0.0 to 3.0.1 in /sist2-admin/frontend	2023-02-23 19:32:09 -05:00
simon987	e44fbf741c	update libscan-test-files	2023-02-23 18:13:27 -05:00
simon987	fa14efbeb6	Handle zipbomb files	2023-02-22 22:25:21 -05:00
simon987	c510162dd9	Fix duration formatting in sist2-admin	2023-02-16 21:07:30 -05:00
simon987	f5c664507f	use index name in sist2-admin auto-named dir	2023-02-16 09:03:06 -05:00
simon987	2805fd509f	Fix tag-auth param in sist2-admin #337	2023-02-13 20:19:24 -05:00
simon987	20adcce4a9	Remove default tags, add configurable featured line	2023-02-13 20:14:11 -05:00
simon987	1e6e24111b	Add german in loading page	2023-02-13 20:13:07 -05:00
dependabot[bot]	5a76b855c9	Bump @sideway/formula from 3.0.0 to 3.0.1 in /sist2-admin/frontend Bumps [@sideway/formula](https://github.com/sideway/formula) from 3.0.0 to 3.0.1. - [Release notes](https://github.com/sideway/formula/releases) - [Commits](https://github.com/sideway/formula/compare/v3.0.0...v3.0.1) --- updated-dependencies: - dependency-name: "@sideway/formula" dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2023-02-09 08:44:17 +00:00
simon987	6f759642fc	Rework duration/resolution badge style	2023-02-07 20:39:12 -05:00
simon987	587c9a2c90	Add de lang option in config page	2023-02-03 09:27:30 -05:00
simon987	821a571ecf	Merge pull request #335 from einfachTobi/UI-localization-german UI localization german + equations in tesseract	2023-02-03 09:19:33 -05:00
einfachTobi	9020246a01	Merge branch 'simon987:master' into UI-localization-german	2023-02-03 10:18:54 +01:00
einfachTobi	200c000c5a	Update Dockerfile	2023-02-03 10:18:43 +01:00
einfachTobi	a43f930d00	Update messages.ts	2023-02-03 10:12:24 +01:00
simon987	abe120197a	Remove generated files from repo, build vue frontends in Dockerfile	2023-02-02 20:31:16 -05:00
simon987	9e0d7bf992	Add test files as submodule, remove support for msword thumbnails	2023-02-02 19:52:37 -05:00
einfachTobi	959d4b4386	Update messages.ts	2023-02-01 14:55:37 +01:00
einfachTobi	742a50be03	Update messages.ts	2023-02-01 12:54:06 +01:00
simon987	87ecc5ef6d	Update USAGE.md	2023-01-29 12:47:17 -05:00
simon987	2e3d648796	Update --thumbnail-quality argument, add documentation	2023-01-29 11:24:34 -05:00
simon987	9972e21fcc	Fix lightbox	2023-01-26 20:20:58 -05:00
simon987	c625c03552	Fix #328	2023-01-25 21:30:18 -05:00
simon987	5863b9cd6e	Merge pull request #327 from simon987/auth0 Add support for auth0	2023-01-24 19:56:05 -05:00
		`@@ -1 +0,0 @@`
			.navbar[data-v-27bc1d68]{box-shadow:0 .125rem .25rem rgba(0,0,0,.08)!important;border-radius:0}.theme-black .navbar[data-v-27bc1d68]{background:rgba(84,107,122,.18823529411764706);border-bottom:none}.navbar-brand[data-v-27bc1d68]{color:#222!important;font-size:1.75rem;padding:0}.navbar-brand[data-v-27bc1d68]:hover{color:#000!important}.version[data-v-27bc1d68]{color:#222!important;margin-left:-18px;margin-top:-14px;font-size:11px;font-family:monospace}.btn-link[data-v-27bc1d68]{color:#222}body,html{height:100%}#app{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;color:#2c3e50;padding-bottom:1em;min-height:100%}.info-icon{width:1rem;margin-right:.2rem;cursor:pointer;line-height:1rem;height:1rem;background-image:url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0MjYuNjY3IDQyNi42NjciIGZpbGw9IiNmZmYiPjxwYXRoIGQ9Ik0xOTIgMTkyaDQyLjY2N3YxMjhIMTkyeiIvPjxwYXRoIGQ9Ik0yMTMuMzMzIDBDOTUuNDY3IDAgMCA5NS40NjcgMCAyMTMuMzMzczk1LjQ2NyAyMTMuMzMzIDIxMy4zMzMgMjEzLjMzM1M0MjYuNjY3IDMzMS4yIDQyNi42NjcgMjEzLjMzMyAzMzEuMiAwIDIxMy4zMzMgMHptMCAzODRjLTk0LjA4IDAtMTcwLjY2Ny03Ni41ODctMTcwLjY2Ny0xNzAuNjY3UzExOS4yNTMgNDIuNjY3IDIxMy4zMzMgNDIuNjY3IDM4NCAxMTkuMjUzIDM4NCAyMTMuMzMzIDMwNy40MTMgMzg0IDIxMy4zMzMgMzg0eiIvPjxwYXRoIGQ9Ik0xOTIgMTA2LjY2N2g0Mi42Njd2NDIuNjY3SDE5MnoiLz48L3N2Zz4=);filter:brightness(45%);display:block}.tabs{margin-top:10px}.modal-title{text-overflow:ellipsis;overflow:hidden;white-space:nowrap}@media screen and (min-width:1500px){.container{max-width:1440px}}label{margin-top:.5rem;margin-bottom:0}.shrink[data-v-9b017c42]{flex-grow:inherit}#task-history[data-v-46960281]{font-family:monospace;font-size:12px}#log-tail-output span{display:block}span.DEBUG{color:#9e9e9e}span.WARNING{color:#ffb300}span.INFO{color:#039be5}span.ERROR,span.FATAL{color:#f4511e}span.ADMIN{color:#ee05ff}#log-tail-output{font-size:13px;font-family:monospace;padding:6px;background-color:#f5f5f5;border:1px solid #ccc;border-radius:4px;margin:3px;white-space:pre;color:#000;overflow:hidden}
		`@@ -1 +0,0 @@`
			<!DOCTYPE html><html lang=""><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><link rel="icon" href="favicon.ico"><title>sist2-admin</title><link href="css/app.css" rel="preload" as="style"><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="js/app.js" rel="preload" as="script"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/app.css" rel="stylesheet"></head><body><noscript><strong>We're sorry but sist2-admin-vue doesn't work properly without JavaScript enabled. Please enable it to continue.</strong></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/app.js"></script></body></html>