Merge pull request #345 from simon987/process-pool

Process pool
2025-10-24 12:56:53 +00:00 · 2023-04-10 19:50:23 -04:00 · 2023-04-10 19:50:23 -04:00 · 3ef675abcf
commit 3ef675abcf
parent 60c77678b4 01490d1cbf
84 changed files with 4885 additions and 4257 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -15,7 +15,6 @@ Makefile
 **/*.cbp
 VERSION
 **/node_modules/
-.git/
 sist2-*-linux-debug
 sist2-*-linux
 sist2_debug
@ -33,4 +32,9 @@ tmp_scan/
 Dockerfile
 Dockerfile.arm64
 docker-compose.yml
-state.db
+state.db
+*-journal
+build/
+__pycache__/
+sist2-vue/dist
+sist2-admin/frontend/dist
--- a/.gitignore
+++ b/.gitignore
@ -41,3 +41,6 @@ build.ninja
 src/web/static_generated.c
 src/magic_generated.c
 src/index/static_generated.c
+*.sist2
+*-shm
+*-journal
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,7 +5,6 @@ set(CMAKE_C_STANDARD 11)

 option(SIST_DEBUG "Build a debug executable" on)
 option(SIST_FAST "Enable more optimisation flags" off)
-option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)

 add_compile_definitions(
        "SIST_PLATFORM=${SIST_PLATFORM}"
@ -22,29 +21,33 @@ set(ARGPARSE_SHARED off)
 add_subdirectory(third-party/argparse)

 add_executable(sist2
+        # argparse
+        third-party/argparse/argparse.h third-party/argparse/argparse.c
+
        src/main.c
        src/sist.h
        src/io/walk.h src/io/walk.c
-        src/io/store.h src/io/store.c
        src/tpool.h src/tpool.c
        src/parsing/parse.h src/parsing/parse.c
+        src/parsing/magic_util.c src/parsing/magic_util.h
        src/io/serialize.h src/io/serialize.c
        src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
        src/index/web.c src/index/web.h
        src/web/serve.c src/web/serve.h
+        src/web/web_util.c src/web/web_util.h
        src/index/elastic.c src/index/elastic.h
        src/util.c src/util.h
-        src/ctx.h src/types.h
+        src/ctx.c src/ctx.h
+        src/types.h
        src/log.c src/log.h
        src/cli.c src/cli.h
-        src/stats.c src/stats.h src/ctx.c
        src/parsing/sidecar.c src/parsing/sidecar.h
+        src/database/database.c src/database/database.h
+        src/parsing/fs_util.h

        src/auth0/auth0_c_api.h src/auth0/auth0_c_api.cpp

-        # argparse
-        third-party/argparse/argparse.h third-party/argparse/argparse.c
-        )
+        src/database/database_stats.c src/database/database_stats.h src/database/database_schema.c)
 set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)

 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@ -52,16 +55,11 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

 find_package(PkgConfig REQUIRED)

-pkg_search_module(GLIB REQUIRED glib-2.0)
-
-find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
 find_package(CURL CONFIG REQUIRED)
-find_library(MAGIC_LIB
-        NAMES libmagic.so.1 magic
-        PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
-)
+find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
+find_package(unofficial-sqlite3 CONFIG REQUIRED)


 target_include_directories(
@ -70,7 +68,6 @@ target_include_directories(
        ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
        ${CMAKE_SOURCE_DIR}/third-party/libscan/
        ${CMAKE_SOURCE_DIR}/
-        ${GLIB_INCLUDE_DIRS}
 )

 target_compile_options(
@ -120,6 +117,7 @@ else ()
            -Ofast
            -fno-stack-protector
            -fomit-frame-pointer
+            -w
    )
 endif ()

@ -133,20 +131,16 @@ target_link_libraries(
        sist2

        z
-        lmdb
-        cjson
        argparse
-        ${GLIB_LDFLAGS}
        unofficial::mongoose::mongoose
        CURL::libcurl

        pthread

-        c
-
        scan

        ${MAGIC_LIB}
+        unofficial::sqlite3::sqlite3
 )

 add_custom_target(
--- a/10
+++ b/10
@ -19,9 +19,9 @@ COPY sist2-admin sist2-admin
 RUN cd sist2-vue/ && npm install && npm run build
 RUN cd sist2-admin/frontend/ && npm install && npm run build

-RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-RUN make -j$(nproc)
-RUN strip sist2 || mv sist2_debug sist2
+RUN mkdir build && cd build && cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
+RUN cd build && make -j$(nproc)
+RUN strip build/sist2 || mv build/sist2_debug build/sist2

 FROM --platform="linux/amd64" ubuntu@sha256:965fbcae990b0467ed5657caceaec165018ef44a4d2d46c7cdea80a9dff0d1ea

@ -33,7 +33,7 @@ ENV LC_ALL C.UTF-8
 ENTRYPOINT ["/root/sist2"]

 RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libasan5 libmagic1 python3  \
-    python3-pip git tesseract-ocr libpq-dev && rm -rf /var/lib/apt/lists/*
+    python3-pip git tesseract-ocr && rm -rf /var/lib/apt/lists/*

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@ -49,7 +49,7 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata

 # sist2
-COPY --from=build /build/sist2 /root/sist2
+COPY --from=build /build/build/sist2 /root/sist2

 # sist2-admin
 COPY sist2-admin/requirements.txt sist2-admin/
--- a/Dockerfile.arm64
+++ b/Dockerfile.arm64
@ -3,13 +3,20 @@ MAINTAINER simon987 <me@simon987.net>

 WORKDIR /build/
 ADD . /build/
-RUN cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-RUN make -j$(nproc)
-RUN strip sist2
+RUN mkdir build && cd build && cmake -DSIST_PLATFORM=arm64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake ..
+RUN cd build && make -j$(nproc)
+RUN strip build/sist2 || mv build/sist2_debug build/sist2

-FROM --platform="linux/arm64/v8" ubuntu:20.04
+FROM --platform=linux/arm64/v8 ubuntu@sha256:537da24818633b45fcb65e5285a68c3ec1f3db25f5ae5476a7757bc8dfae92a3

-RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
+WORKDIR /root
+
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
+ENTRYPOINT ["/root/sist2"]
+
+RUN apt update && apt install -y curl libasan5 libmagic1 tesseract-ocr python3-pip python3 git && rm -rf /var/lib/apt/lists/*

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@ -18,11 +25,16 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
-    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
+    curl -o /usr/share/tessdata/osd.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/osd.traineddata &&\
+    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata &&\
+    curl -o /usr/share/tessdata/deu.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/deu.traineddata &&\
+    curl -o /usr/share/tessdata/equ.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/equ.traineddata &&\
+    curl -o /usr/share/tessdata/chi_sim.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/chi_sim.traineddata

-ENV LANG C.UTF-8
-ENV LC_ALL C.UTF-8
+# sist2
+COPY --from=build /build/build/sist2 /root/sist2

-ENTRYPOINT ["/root/sist2"]
-
-COPY --from=build /build/sist2 /root/sist2
+# sist2-admin
+COPY sist2-admin/requirements.txt sist2-admin/
+RUN python3 -m pip install --no-cache -r sist2-admin/requirements.txt
+COPY --from=build /build/sist2-admin/ sist2-admin/
--- a/README.md
+++ b/README.md
@ -37,12 +37,12 @@ sist2 (Simple incremental search tool)
    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
    1. *(or)* Run using docker:
        ```bash
-        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.14.0
+        docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.17.9
        ```
    1. *(or)* Run using docker-compose:
        ```yaml
          elasticsearch:
-            image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
+            image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9
            environment:
              - discovery.type=single-node
              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
@ -149,8 +149,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
 1. Install vcpkg dependencies

    ```bash
-    vcpkg install curl[core,openssl]
-    vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
+    vcpkg install curl[core,openssl] sqlite3 cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample]
    ```

 1. Build
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -2,7 +2,7 @@ version: "3"

 services:
  elasticsearch:
-    image: elasticsearch:7.14.0
+    image: elasticsearch:7.17.9
    container_name: sist2-es
    environment:
      - "discovery.type=single-node"
@ -15,9 +15,9 @@ services:
      - /mnt/array/sist2-admin-data/:/sist2-admin/
      - /:/host
    ports:
+      - 4090:4090
      # NOTE: Don't export this port publicly!
      - 8080:8080
-      - 4090:4090
    working_dir: /root/sist2-admin/
    entrypoint: python3
    command:
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@ -164,6 +164,8 @@ that is about `8000000 * 36kB = 288GB`.

 ![thumbnail_size](thumbnail_size.png)

+// TODO: add note about LMDB page size 4096
+
 ### Scan examples

 Simple scan
--- a/scripts/before_build.sh
+++ b/scripts/before_build.sh
@ -1,10 +1,13 @@
 #!/usr/bin/env bash

-rm -rf index.sist2/
+(
+  cd ..
+  rm -rf index.sist2

-python3 scripts/mime.py > src/parsing/mime_generated.c
-python3 scripts/serve_static.py > src/web/static_generated.c
-python3 scripts/index_static.py > src/index/static_generated.c
-python3 scripts/magic_static.py > src/magic_generated.c
+  python3 scripts/mime.py > src/parsing/mime_generated.c
+  python3 scripts/serve_static.py > src/web/static_generated.c
+  python3 scripts/index_static.py > src/index/static_generated.c
+  python3 scripts/magic_static.py > src/magic_generated.c

-printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
+  printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
+)
--- a/scripts/mime.csv
+++ b/scripts/mime.csv
@ -1,3 +1,4 @@
+application/x-matlab-data,mat
 application/arj, arj
 application/base64, mme
 application/binhex, hqx
@ -29,7 +30,7 @@ application/mime, aps
 application/mspowerpoint, ppz
 application/msword, doc|dot|w6w|wiz|word
 application/netmc, mcp
-application/octet-stream, bin|dump|gpg
+application/octet-stream, bin|dump|gpg|pack|idx
 application/oda, oda
 application/ogg, ogv
 application/pdf, pdf
@ -243,7 +244,7 @@ audio/make, funk|my|pfunk
 audio/midi, kar
 audio/mid, rmi
 audio/mp4, m4b
-audio/mpeg, m2a|mpa
+audio/mpeg, m2a|mpa|mpga
 audio/ogg, ogg
 audio/s3m, s3m
 audio/tsp-audio, tsi
@ -346,6 +347,8 @@ text/mcf, mcf
 text/pascal, pas
 text/PGP,
 text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
+text/x-script.python, pyx
+text/csv,
 application/vnd.coffeescript, coffee
 text/richtext, rt|rtf|rtx
 text/rtf,
@ -382,7 +385,7 @@ text/x-pascal, p
 text/x-perl, pl
 text/x-php, php
 text/x-po, po
-text/x-python, py
+text/x-python, py|pyi
 text/x-ruby, rb
 text/x-sass, sass
 text/x-scss, scss
--- a/scripts/mime.py
+++ b/scripts/mime.py
@ -1,3 +1,5 @@
+import zlib
+
 mimes = {}
 noparse = set()
 ext_in_hash = set()
@ -135,24 +137,40 @@ def clean(t):
    return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")


+def crc(s):
+    return zlib.crc32(s.encode()) & 0xffffffff
+
+
 with open("scripts/mime.csv") as f:
    for l in f:
        mime, ext_list = l.split(",")
        if l.startswith("!"):
            mime = mime[1:]
            noparse.add(mime)
-        ext = [x.strip() for x in ext_list.split("|")]
+        ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
        mimes[mime] = ext

+    seen_crc = set()
+    for ext in mimes.values():
+        for e in ext:
+            if crc(e) in seen_crc:
+                raise Exception("CRC32 collision")
+            seen_crc.add(crc(e))
+
+    seen_crc = set()
+    for mime in mimes.keys():
+        if crc(mime) in seen_crc:
+            raise Exception("CRC32 collision")
+        seen_crc.add(crc(mime))
+
    print("// **Generated by mime.py**")
    print("#ifndef MIME_GENERATED_C")
    print("#define MIME_GENERATED_C")
-    print("#include <glib.h>\n")
    print("#include <stdlib.h>\n")
    # Enum
    print("enum mime {")
    for mime, ext in sorted(mimes.items()):
-        print("    " + clean(mime) + "=" + mime_id(mime) + ",")
+        print(f"{clean(mime)}={mime_id(mime)},")
    print("};")

    # Enum -> string
@ -163,20 +181,20 @@ with open("scripts/mime.csv") as f:
    print("default: return NULL;}}")

    # Ext -> Enum
-    print("GHashTable *mime_get_ext_table() {"
-          "GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);")
+    print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
+          "switch (extension_crc32) {")
    for mime, ext in mimes.items():
-        for e in [e for e in ext if e]:
-            print("g_hash_table_insert(ext_table, \"" + e + "\", (gpointer)" + clean(mime) + ");")
-            if e in ext_in_hash:
-                raise Exception("extension already in hash: " + e)
-            ext_in_hash.add(e)
-    print("return ext_table;}")
+        if len(ext) > 0:
+            for e in ext:
+                print(f"case {crc(e)}:", end="")
+            print(f"return {clean(mime)};")
+    print("default: return 0;}}")

    # string -> Enum
-    print("GHashTable *mime_get_mime_table() {"
-          "GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);")
-    for mime, ext in mimes.items():
-        print("g_hash_table_insert(mime_table, \"" + mime + "\", (gpointer)" + clean(mime) + ");")
-    print("return mime_table;}")
+    print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
+          "switch (mime_crc32) {")
+    for mime in mimes.keys():
+        print(f"case {crc(mime)}: return {clean(mime)};")
+
+    print("default: return 0;}}")
    print("#endif")
--- a/scripts/start_dev_es.sh
+++ b/scripts/start_dev_es.sh
@ -1,3 +1,3 @@
 docker run --rm -it --name "sist2-dev-es"\
       	-p 9200:9200 -e "discovery.type=single-node" \
-	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
+	-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.17.9
--- a/sist2-admin/frontend/public/index.html
+++ b/sist2-admin/frontend/public/index.html
@ -4,7 +4,7 @@
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width,initial-scale=1.0">
-    <link rel="icon" href="<%= BASE_URL %>favicon.ico">
+    <link rel="icon" href="<%= BASE_URL %>serve_favicon_ico.ico">
    <title>sist2-admin</title>
  </head>
  <body>
--- a/sist2-admin/frontend/src/components/JobOptions.vue
+++ b/sist2-admin/frontend/src/components/JobOptions.vue
@ -28,16 +28,22 @@ export default {
      return this.$store.state.jobDesktopNotificationMap[this.job.name];
    }
  },
-  methods: {
+    mounted() {
+      this.cronValid = this.checkCron(this.job.cron_expression)
+    },
+    methods: {
+      checkCron(expression) {
+          return /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(expression);
+      },
    updateNotifications(value) {
      this.$store.dispatch("setJobDesktopNotification", {
        job: this.job.name,
        enabled: value
-      })
+      });
    },
    update() {
      if (this.job.schedule_enabled) {
-        this.cronValid = /((((\d+,)+\d+|(\d+([/-])\d+)|\d+|\*) ?){5,7})/.test(this.job.cron_expression);
+        this.cronValid = this.checkCron(this.job.cron_expression);
      } else {
        this.cronValid = undefined;
      }
--- a/sist2-admin/frontend/src/components/ScanOptions.vue
+++ b/sist2-admin/frontend/src/components/ScanOptions.vue
@ -6,9 +6,6 @@
    <label>{{ $t("scanOptions.threads") }}</label>
    <b-form-input type="number" min="1" v-model="options.threads" @change="update()"></b-form-input>

-    <label>{{ $t("scanOptions.memThrottle") }}</label>
-    <b-form-input type="number" min="0" v-model="options.mem_throttle" @change="update()"></b-form-input>
-
    <label>{{ $t("scanOptions.thumbnailQuality") }}</label>
    <b-form-input type="number" min="1" max="31" v-model="options.thumbnail_quality" @change="update()"></b-form-input>

@ -70,8 +67,9 @@
      {{ $t("scanOptions.readSubtitles") }}
    </b-form-checkbox>

-    <label>{{ $t("scanOptions.memBuffer") }}</label>
-    <b-form-input type="number" min="0" v-model="options.mem_buffer" @change="update()"></b-form-input>
+    <b-form-checkbox v-model="options.optimize_index" @change="update()">
+        {{ $t("scanOptions.optimizeIndex") }}
+    </b-form-checkbox>

    <label>{{ $t("scanOptions.treemapThreshold") }}</label>
    <b-form-input type="number" min="0" v-model="options.treemap_threshold" @change="update()"></b-form-input>
--- a/sist2-admin/frontend/src/i18n/messages.js
+++ b/sist2-admin/frontend/src/i18n/messages.js
@ -56,6 +56,10 @@ export default {
            tagline: "Tagline in navbar",
            auth: "Basic auth in user:password format",
            tagAuth: "Basic auth in user:password format for tagging",
+            auth0Audience: "Auth0 audience",
+            auth0Domain: "Auth0 domain",
+            auth0ClientId: "Auth0 client ID",
+            auth0PublicKey: "Auth0 public key",
        },
        scanOptions: {
            title: "Scanning options",
@ -80,7 +84,8 @@ export default {
            checksums: "Calculate file checksums when scanning",
            readSubtitles: "Read subtitles from media files",
            memBuffer: "Maximum memory buffer size per thread in MiB for files inside archives",
-            treemapThreshold: "Relative size threshold for treemap"
+            treemapThreshold: "Relative size threshold for treemap",
+            optimizeIndex: "Defragment index file after scan to reduce its file size."
        },
        indexOptions: {
            title: "Indexing options",
--- a/sist2-admin/frontend/yarn.lock
+++ b/sist2-admin/frontend/yarn.lock
--- a/sist2-admin/sist2_admin/app.py
+++ b/sist2-admin/sist2_admin/app.py
@ -251,7 +251,7 @@ def check_es_version(es_url: str, insecure: bool):


 def start_frontend_(frontend: Sist2Frontend):
-    frontend.web_options.indices = list(map(lambda j: db["jobs"][j].last_index, frontend.jobs))
+    frontend.web_options.indices = list(map(lambda j: db["jobs"][j].index_path, frontend.jobs))

    pid = sist2.web(frontend.web_options, frontend.name)
    RUNNING_FRONTENDS[frontend.name] = pid
@ -378,6 +378,9 @@ if __name__ == '__main__':
    if db["sist2_admin"]["info"]["version"] == "1":
        logger.info("Migrating to v2 database schema")
        migrate_v1_to_v2(db)
+    if db["sist2_admin"]["info"]["version"] == "2":
+        logger.error("Cannot migrate database from v2 to v3. Delete state.db to proceed.")
+        exit(-1)

    start_frontends()
    cron.initialize(db, _run_job)
--- a/sist2-admin/sist2_admin/jobs.py
+++ b/sist2-admin/sist2_admin/jobs.py
@ -1,23 +1,21 @@
 import json
 import logging
 import os.path
-import shutil
 import signal
 import uuid
 from datetime import datetime
 from enum import Enum
-from hashlib import md5
 from logging import FileHandler
 from threading import Lock, Thread
 from time import sleep
 from uuid import uuid4, UUID

 from hexlib.db import PersistentState
-from pydantic import BaseModel, validator
+from pydantic import BaseModel

 from config import logger, LOG_FOLDER
 from notifications import Notifications
-from sist2 import ScanOptions, IndexOptions, Sist2, Sist2Index
+from sist2 import ScanOptions, IndexOptions, Sist2
 from state import RUNNING_FRONTENDS
 from web import Sist2Frontend

@ -38,7 +36,8 @@ class Sist2Job(BaseModel):
    schedule_enabled: bool = False

    previous_index: str = None
-    last_index: str = None
+    index_path: str = None
+    previous_index_path: str = None
    last_index_date: datetime = None
    status: JobStatus = JobStatus("created")
    last_modified: datetime
@ -124,10 +123,10 @@ class Sist2ScanTask(Sist2Task):

        self.job.scan_options.name = self.job.name

-        if self.job.last_index and os.path.exists(self.job.last_index) and not self.job.do_full_scan:
-            self.job.scan_options.incremental = self.job.last_index
+        if self.job.index_path is not None and not self.job.do_full_scan:
+            self.job.scan_options.output = self.job.index_path
        else:
-            self.job.scan_options.incremental = None
+            self.job.scan_options.output = None

        def set_pid(pid):
            self.pid = pid
@ -139,19 +138,26 @@ class Sist2ScanTask(Sist2Task):
            self._logger.error(json.dumps({"sist2-admin": f"Process returned non-zero exit code ({return_code})"}))
            logger.info(f"Task {self.display_name} failed ({return_code})")
        else:
-            index = Sist2Index(self.job.scan_options.output)
-
-            # Save latest index
-            self.job.previous_index = self.job.last_index
-
-            self.job.last_index = index.path
+            self.job.index_path = self.job.scan_options.output
            self.job.last_index_date = datetime.now()
            self.job.do_full_scan = False
            db["jobs"][self.job.name] = self.job
-            self._logger.info(json.dumps({"sist2-admin": f"Save last_index={self.job.last_index}"}))
+            self._logger.info(json.dumps({"sist2-admin": f"Save last_index_date={self.job.last_index_date}"}))

        logger.info(f"Completed {self.display_name} ({return_code=})")

+        # Remove old index
+        if return_code == 0:
+            if self.job.previous_index_path is not None and self.job.previous_index_path != self.job.index_path:
+                self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index_path=}"}))
+                try:
+                    os.remove(self.job.previous_index_path)
+                except FileNotFoundError:
+                    pass
+
+            self.job.previous_index_path = self.job.index_path
+            db["jobs"][self.job.name] = self.job
+
        return return_code


@ -173,18 +179,11 @@ class Sist2IndexTask(Sist2Task):
        ok = return_code == 0

        if ok:
-            # Remove old index
-            if self.job.previous_index is not None:
-                self._logger.info(json.dumps({"sist2-admin": f"Remove {self.job.previous_index=}"}))
-                try:
-                    shutil.rmtree(self.job.previous_index)
-                except FileNotFoundError:
-                    pass
-
            self.restart_running_frontends(db, sist2)

        # Update status
        self.job.status = JobStatus("indexed") if ok else JobStatus("failed")
+        self.job.previous_index_path = self.job.index_path
        db["jobs"][self.job.name] = self.job

        self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
@ -198,13 +197,16 @@ class Sist2IndexTask(Sist2Task):
            frontend = db["frontends"][frontend_name]
            frontend: Sist2Frontend

-            os.kill(pid, signal.SIGTERM)
+            try:
+                os.kill(pid, signal.SIGTERM)
+            except ProcessLookupError:
+                pass
            try:
                os.wait()
            except ChildProcessError:
                pass

-            frontend.web_options.indices = map(lambda j: db["jobs"][j].last_index, frontend.jobs)
+            frontend.web_options.indices = map(lambda j: db["jobs"][j].index_path, frontend.jobs)

            pid = sist2.web(frontend.web_options, frontend.name)
            RUNNING_FRONTENDS[frontend_name] = pid
--- a/sist2-admin/sist2_admin/sist2.py
+++ b/sist2-admin/sist2_admin/sist2.py
@ -2,7 +2,6 @@ import datetime
 import json
 import logging
 import os.path
-import traceback
 from datetime import datetime
 from io import TextIOWrapper
 from logging import FileHandler
@ -78,10 +77,10 @@ class IndexOptions(BaseModel):
    es_url: str = "http://elasticsearch:9200"
    es_insecure_ssl: bool = False
    es_index: str = "sist2"
-    incremental_index: bool = False
+    incremental_index: bool = True
    script: str = ""
    script_file: str = None
-    batch_size: int = 100
+    batch_size: int = 70

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
@ -110,9 +109,8 @@ ARCHIVE_RECURSE = "recurse"
 class ScanOptions(BaseModel):
    path: str
    threads: int = 1
-    mem_throttle: int = 0
    thumbnail_quality: int = 2
-    thumbnail_size: int = 500
+    thumbnail_size: int = 552
    thumbnail_count: int = 1
    content_size: int = 32768
    depth: int = -1
@ -128,7 +126,8 @@ class ScanOptions(BaseModel):
    read_subtitles: bool = False
    fast_epub: bool = False
    checksums: bool = False
-    incremental: str = None
+    incremental: bool = True
+    optimize_index: bool = False
    output: str = None
    name: str = None
    rewrite_url: str = None
@ -138,14 +137,15 @@ class ScanOptions(BaseModel):
        super().__init__(**kwargs)

    def args(self):
-        args = ["scan", self.path, f"--threads={self.threads}", f"--mem-throttle={self.mem_throttle}",
-                f"--thumbnail-quality={self.thumbnail_quality}", f"--thumbnail-count={self.thumbnail_count}",
-                f"--thumbnail-size={self.thumbnail_size}", f"--content-size={self.content_size}",
-                f"--output={self.output}", f"--depth={self.depth}", f"--archive={self.archive}",
-                f"--mem-buffer={self.mem_buffer}"]
+        args = ["scan", self.path, f"--threads={self.threads}", f"--thumbnail-quality={self.thumbnail_quality}",
+                f"--thumbnail-count={self.thumbnail_count}", f"--thumbnail-size={self.thumbnail_size}",
+                f"--content-size={self.content_size}", f"--output={self.output}", f"--depth={self.depth}",
+                f"--archive={self.archive}", f"--mem-buffer={self.mem_buffer}"]

        if self.incremental:
-            args.append(f"--incremental={self.incremental}")
+            args.append(f"--incremental")
+        if self.optimize_index:
+            args.append(f"--optimize-index")
        if self.rewrite_url:
            args.append(f"--rewrite-url={self.rewrite_url}")
        if self.name:
@ -235,11 +235,11 @@ class Sist2:

    def scan(self, options: ScanOptions, logs_cb, set_pid_cb):

-        output_dir = os.path.join(
-            self._data_dir,
-            f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
-        )
-        options.output = output_dir
+        if options.output is None:
+            options.output = os.path.join(
+                self._data_dir,
+                f"scan-{options.name.replace('/', '_')}-{datetime.now()}.sist2"
+            )

        args = [
            self._bin_path,
--- a/sist2-admin/sist2_admin/state.py
+++ b/sist2-admin/sist2_admin/state.py
@ -10,7 +10,7 @@ RUNNING_FRONTENDS: Dict[str, int] = {}

 TESSERACT_LANGS = get_tesseract_langs()

-DB_SCHEMA_VERSION = "2"
+DB_SCHEMA_VERSION = "3"

 from pydantic import BaseModel

@ -76,4 +76,4 @@ def migrate_v1_to_v2(db: PersistentState):

    db["sist2_admin"]["info"] = {
        "version": "2"
-    }
+    }
--- a/sist2-vue/src/components/FullThumbnail.vue
+++ b/sist2-vue/src/components/FullThumbnail.vue
@ -75,7 +75,7 @@ export default {
      }
      return (this.currentThumbnailNum === 0)
          ? `t/${doc._source.index}/${doc._id}`
-          : `t/${doc._source.index}/${doc._id}${String(thumbnailNum).padStart(4, "0")}`;
+          : `t/${doc._source.index}/${doc._id}/${String(thumbnailNum).padStart(4, "0")}`;
    },
    humanTime: humanTime,
    onThumbnailClick() {
--- a/src/auth0/auth0_c_api.h
+++ b/src/auth0/auth0_c_api.h
@ -1,12 +1,13 @@
 #ifndef SIST2_AUTH0_C_API_H
 #define SIST2_AUTH0_C_API_H

-#include "stdlib.h"

 #ifdef __cplusplus
 #define EXTERNC extern "C"
+#include "cstdlib"
 #else
 #define EXTERNC
+#include "stdlib.h"
 #endif

 #define AUTH0_OK (0)
--- a/src/cli.c
+++ b/src/cli.c
@ -2,16 +2,17 @@
 #include "ctx.h"
 #include <tesseract/capi.h>

-#define DEFAULT_OUTPUT "index.sist2/"
+#define DEFAULT_OUTPUT "index.sist2"
+#define DEFAULT_NAME "index"
 #define DEFAULT_CONTENT_SIZE 32768
 #define DEFAULT_QUALITY 2
-#define DEFAULT_THUMBNAIL_SIZE 500
+#define DEFAULT_THUMBNAIL_SIZE 552
 #define DEFAULT_THUMBNAIL_COUNT 1
 #define DEFAULT_REWRITE_URL ""

 #define DEFAULT_ES_URL "http://localhost:9200"
 #define DEFAULT_ES_INDEX "sist2"
-#define DEFAULT_BATCH_SIZE 100
+#define DEFAULT_BATCH_SIZE 70
 #define DEFAULT_TAGLINE "Lightning-fast file system indexer and search tool"
 #define DEFAULT_LANG "en"

@ -20,8 +21,6 @@

 #define DEFAULT_MAX_MEM_BUFFER 2000

-#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
-
 const char *TESS_DATAPATHS[] = {
        "/usr/share/tessdata/",
        "/usr/share/tesseract-ocr/tessdata/",
@ -48,9 +47,6 @@ void scan_args_destroy(scan_args_t *args) {
    if (args->name != NULL) {
        free(args->name);
    }
-    if (args->incremental != NULL) {
-        free(args->incremental);
-    }
    if (args->path != NULL) {
        free(args->path);
    }
@ -61,7 +57,6 @@ void scan_args_destroy(scan_args_t *args) {
 }

 void index_args_destroy(index_args_t *args) {
-    //todo
    if (args->es_mappings_path) {
        free(args->es_mappings);
    }
@ -76,7 +71,6 @@ void index_args_destroy(index_args_t *args) {
 }

 void web_args_destroy(web_args_t *args) {
-    //todo
    free(args);
 }

@ -97,19 +91,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    char *abs_path = abspath(argv[1]);
    if (abs_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
    } else {
+        abs_path = realloc(abs_path, strlen(abs_path) + 2);
+        strcat(abs_path, "/");
        args->path = abs_path;
    }

-    if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
-        args->incremental = abspath(args->incremental);
-        if (abs_path == NULL) {
-            sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
-            args->incremental = NULL;
-        }
-    }
-
    if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
        args->tn_quality = DEFAULT_QUALITY;
    } else if (args->tn_quality < 2 || args->tn_quality > 31) {
@ -140,8 +128,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    if (args->threads == 0) {
        args->threads = 1;
-    } else if (args->threads < 0) {
-        fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
+    } else if (args->threads < 0 || args->threads > 256) {
+        fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number <= 256\n", args->threads);
        return 1;
    }

@ -152,20 +140,24 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->output = expandpath(args->output);
    }

-    int ret = mkdir(args->output, S_IRUSR | S_IWUSR | S_IXUSR);
-    if (ret != 0) {
-        fprintf(stderr, "Invalid output: '%s' (%s).\n", args->output, strerror(errno));
-        return 1;
+    char *abs_output = abspath(args->output);
+    if (args->incremental && abs_output == NULL) {
+        LOG_WARNINGF("main.c", "Could not open original index for incremental scan: %s. Will not perform incremental scan.", args->output);
+        args->incremental = FALSE;
+    } else if (!args->incremental && abs_output != NULL) {
+        LOG_FATALF("main.c", "Index already exists: %s. If you wish to perform incremental scan, you must specify --incremental", abs_output);
    }
+    free(abs_output);

    if (args->depth <= 0) {
-        args->depth = G_MAXINT32;
+        args->depth = 2147483647;
    } else {
        args->depth += 1;
    }

    if (args->name == OPTION_VALUE_UNSPECIFIED) {
-        args->name = g_path_get_basename(args->output);
+        args->name = malloc(strlen(DEFAULT_NAME) + 1);
+        strcpy(args->name, DEFAULT_NAME);
    } else {
        char *tmp = malloc(strlen(args->name) + 1);
        strcpy(tmp, args->name);
@ -224,7 +216,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
            }
            if (trained_data_path != NULL && path != trained_data_path) {
                LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata "
-                                   "files must be in the same folder")
+                                   "files must be in the same folder");
            }
            trained_data_path = path;

@ -232,7 +224,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
        free(lang);

-        ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
+        int ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
        if (ret != 0) {
            fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
            return 1;
@ -249,12 +241,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

        pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
        if (error != NULL) {
-            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
+            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset);
        }

        pcre_extra *re_extra = pcre_study(re, 0, &error);
        if (error != NULL) {
-            LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
+            LOG_FATALF("cli.c", "pcre_study returned error: %s", error);
        }

        ScanCtx.exclude = re;
@ -273,14 +265,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
    }

-    if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
-        args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
-    }
-
    if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
        if (strcmp(args->list_path, "-") == 0) {
            args->list_file = stdin;
-            LOG_DEBUG("cli.c", "Using stdin as list file")
+            LOG_DEBUG("cli.c", "Using stdin as list file");
        } else {
            args->list_file = fopen(args->list_path, "r");

@ -290,27 +278,27 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        }
    }

-    LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
-    LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
-    LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
-    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
-    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
-    LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
-    LOG_DEBUGF("cli.c", "arg output=%s", args->output)
-    LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
-    LOG_DEBUGF("cli.c", "arg name=%s", args->name)
-    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
-    LOG_DEBUGF("cli.c", "arg path=%s", args->path)
-    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
-    LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
-    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
-    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
-    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
-    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
-    LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
-    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
-    LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
-    LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
+    LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality);
+    LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size);
+    LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count);
+    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size);
+    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads);
+    LOG_DEBUGF("cli.c", "arg incremental=%d", args->incremental);
+    LOG_DEBUGF("cli.c", "arg output=%s", args->output);
+    LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url);
+    LOG_DEBUGF("cli.c", "arg name=%s", args->name);
+    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth);
+    LOG_DEBUGF("cli.c", "arg path=%s", args->path);
+    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive);
+    LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase);
+    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang);
+    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path);
+    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex);
+    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast);
+    LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub);
+    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold);
+    LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib);
+    LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path);

    return 0;
 }
@ -320,20 +308,20 @@ int load_external_file(const char *file_path, char **dst) {
    int res = stat(file_path, &info);

    if (res == -1) {
-        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }

    int fd = open(file_path, O_RDONLY);
    if (fd == -1) {
-        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }

    *dst = malloc(info.st_size + 1);
    res = read(fd, *dst, info.st_size);
    if (res < 0) {
-        LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
+        LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno));
        return 1;
    }

@ -361,7 +349,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1]);
    } else {
        args->index_path = index_path;
    }
@ -396,28 +384,28 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
        args->batch_size = DEFAULT_BATCH_SIZE;
    }

-    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
-    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
-    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
-    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
-    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
-    LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
+    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path);
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);
+    LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script);

    if (args->script) {
        char log_buf[5000];

        strncpy(log_buf, args->script, sizeof(log_buf));
        *(log_buf + sizeof(log_buf) - 1) = '\0';
-        LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+        LOG_DEBUGF("cli.c", "arg script=%s", log_buf);
    }

-    LOG_DEBUGF("cli.c", "arg print=%d", args->print)
-    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
-    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
-    LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
-    LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
-    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
-    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
+    LOG_DEBUGF("cli.c", "arg print=%d", args->print);
+    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path);
+    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings);
+    LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path);
+    LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings);
+    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size);
+    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset);

    return 0;
 }
@ -538,23 +526,24 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
-            LOG_FATALF("cli.c", "Index not found: %s", args->indices[i])
+            LOG_FATALF("cli.c", "Index not found: %s", args->indices[i]);
        }
+        free(abs_path);
    }

-    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
-    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
-    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl)
-    LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline)
-    LOG_DEBUGF("cli.c", "arg dev=%d", args->dev)
-    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
-    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
-    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
-    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
-    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
-    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url);
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index);
+    LOG_DEBUGF("cli.c", "arg es_insecure_ssl=%d", args->es_insecure_ssl);
+    LOG_DEBUGF("cli.c", "arg tagline=%s", args->tagline);
+    LOG_DEBUGF("cli.c", "arg dev=%d", args->dev);
+    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address);
+    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials);
+    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials);
+    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user);
+    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass);
+    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count);
    for (int i = 0; i < args->index_count; i++) {
-        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
+        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i]);
    }

    return 0;
@ -579,7 +568,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
+        LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1]);
    } else {
        args->index_path = index_path;
    }
@ -600,12 +589,12 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
        return 1;
    }

-    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path);

    char log_buf[5000];
    strncpy(log_buf, args->script, sizeof(log_buf));
    *(log_buf + sizeof(log_buf) - 1) = '\0';
-    LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
+    LOG_DEBUGF("cli.c", "arg script=%s", log_buf);

    return 0;
 }
--- a/src/cli.h
+++ b/src/cli.h
@ -13,8 +13,8 @@ typedef struct scan_args {
    int tn_size;
    int content_size;
    int threads;
-    int scan_mem_limit_mib;
-    char *incremental;
+    int incremental;
+    int optimize_database;
    char *output;
    char *rewrite_url;
    char *name;
--- a/src/ctx.c
+++ b/src/ctx.c
@ -3,9 +3,10 @@
 ScanCtx_t ScanCtx = {
        .stat_index_size = 0,
        .stat_tn_size = 0,
-        .dbg_current_files = NULL,
-        .pool = NULL
+        .pool = NULL,
+        .index.path = {0,},
 };
 WebCtx_t WebCtx;
 IndexCtx_t IndexCtx;
 LogCtx_t LogCtx;
+__thread ProcData_t ProcData;
--- a/src/ctx.h
+++ b/src/ctx.h
@ -16,47 +16,28 @@
 #include "libscan/msdoc/msdoc.h"
 #include "libscan/wpd/wpd.h"
 #include "libscan/json/json.h"
-#include "src/io/store.h"
+#include "src/database/database.h"
 #include "src/index/elastic.h"
+#include "sqlite3.h"

-#include <glib.h>
 #include <pcre.h>

 typedef struct {
    struct index_t index;

-    GHashTable *mime_table;
-    GHashTable *ext_table;
-
    tpool_t *pool;

-    tpool_t *writer_pool;
-
    int threads;
    int depth;
    int calculate_checksums;
-    size_t mem_limit;

    size_t stat_tn_size;
    size_t stat_index_size;

-    GHashTable *original_table;
-    GHashTable *copy_table;
-    GHashTable *new_table;
-    pthread_mutex_t copy_table_mu;
-
    pcre *exclude;
    pcre_extra *exclude_extra;
    int fast;

-    GHashTable *dbg_current_files;
-    pthread_mutex_t dbg_current_files_mu;
-
-    int dbg_failed_files_count;
-    int dbg_skipped_files_count;
-    int dbg_excluded_files_count;
-    pthread_mutex_t dbg_file_counts_mu;
-
    scan_arc_ctx_t arc_ctx;
    scan_comic_ctx_t comic_ctx;
    scan_ebook_ctx_t ebook_ctx;
@ -85,10 +66,6 @@ typedef struct {
    char *es_index;
    int batch_size;
    tpool_t *pool;
-    store_t *tag_store;
-    GHashTable *tags;
-    store_t *meta_store;
-    GHashTable *meta;
    /**
     * Set to false when using --print
     */
@ -118,10 +95,18 @@ typedef struct {
    int dev;
 } WebCtx_t;

+
+typedef struct {
+    int thread_id;
+    database_t *ipc_db;
+    database_t *index_db;
+} ProcData_t;
+
 extern ScanCtx_t ScanCtx;
 extern WebCtx_t WebCtx;
 extern IndexCtx_t IndexCtx;
 extern LogCtx_t LogCtx;
+extern __thread ProcData_t ProcData;


 #endif
--- a/src/database/database.c
+++ b/src/database/database.c
@ -0,0 +1,626 @@
+#include "database.h"
+#include "malloc.h"
+#include "src/ctx.h"
+#include <string.h>
+#include <pthread.h>
+#include "src/util.h"
+
+#include <time.h>
+
+
+database_t *database_create(const char *filename, database_type_t type) {
+    database_t *db = malloc(sizeof(database_t));
+
+    strcpy(db->filename, filename);
+    db->type = type;
+    db->select_thumbnail_stmt = NULL;
+
+    db->ipc_ctx = NULL;
+
+    return db;
+}
+
+__always_inline
+static int sep_rfind(const char *str) {
+    for (int i = (int) strlen(str); i >= 0; i--) {
+        if (str[i] == '/') {
+            return i;
+        }
+    }
+    return -1;
+}
+
+void path_parent_func(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
+    if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
+        sqlite3_result_error(ctx, "Invalid parameters", -1);
+    }
+
+    const char *value = (const char *) sqlite3_value_text(argv[0]);
+
+    int stop = sep_rfind(value);
+    if (stop == -1) {
+        sqlite3_result_null(ctx);
+        return;
+    }
+    char parent[PATH_MAX * 3];
+    strncpy(parent, value, stop);
+
+    sqlite3_result_text(ctx, parent, stop, SQLITE_TRANSIENT);
+}
+
+
+void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
+    if (argc != 1 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
+        sqlite3_result_error(ctx, "Invalid parameters", -1);
+    }
+
+    database_ipc_ctx_t *ipc_ctx = sqlite3_user_data(ctx);
+
+    const char *current_job = (const char *) sqlite3_value_text(argv[0]);
+
+    char buf[PATH_MAX];
+    strcpy(buf, current_job);
+
+    strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
+
+    sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
+}
+
+void database_initialize(database_t *db) {
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
+
+    LOG_DEBUGF("database.c", "Initializing database %s", db->filename);
+    if (db->type == INDEX_DATABASE) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IndexDatabaseSchema, NULL, NULL, NULL));
+    } else if (db->type == IPC_CONSUMER_DATABASE || db->type == IPC_PRODUCER_DATABASE) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, IpcDatabaseSchema, NULL, NULL, NULL));
+    }
+
+    sqlite3_close(db->db);
+}
+
+void database_open(database_t *db) {
+    LOG_DEBUGF("database.c", "Opening database %s (%d)", db->filename, db->type);
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_open(db->filename, &db->db));
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA cache_size = -200000;", NULL, NULL, NULL));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA synchronous = OFF;", NULL, NULL, NULL));
+
+    if (db->type == INDEX_DATABASE) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA temp_store = memory;", NULL, NULL, NULL));
+    }
+
+    if (db->type == INDEX_DATABASE) {
+        // Prepare statements;
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "SELECT data FROM thumbnail WHERE id=? AND num=? LIMIT 1;", -1,
+                &db->select_thumbnail_stmt, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "UPDATE document SET marked=1 WHERE id=? AND mtime=? RETURNING id",
+                -1,
+                &db->mark_document_stmt, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "REPLACE INTO document_sidecar (id, json_data) VALUES (?,?)", -1,
+                &db->write_document_sidecar_stmt, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "REPLACE INTO document (id, mtime, size, json_data) VALUES (?, ?, ?, ?);", -1,
+                &db->write_document_stmt, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "INSERT INTO thumbnail (id, num, data) VALUES (?,?,?) ON CONFLICT DO UPDATE SET data=excluded.data;",
+                -1,
+                &db->write_thumbnail_stmt, NULL));
+
+        // Create functions
+        sqlite3_create_function(
+                db->db,
+                "path_parent",
+                1,
+                SQLITE_UTF8,
+                NULL,
+                path_parent_func,
+                NULL,
+                NULL
+        );
+    } else if (db->type == IPC_CONSUMER_DATABASE) {
+
+        sqlite3_create_function(
+                db->db,
+                "save_current_job_info",
+                1,
+                SQLITE_UTF8,
+                db->ipc_ctx,
+                save_current_job_info,
+                NULL,
+                NULL
+        );
+
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "DELETE FROM parse_job WHERE id = (SELECT MIN(id) FROM parse_job)"
+                " RETURNING filepath,mtime,st_size,save_current_job_info(filepath);",
+                -1, &db->pop_parse_job_stmt, NULL
+        ));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db,
+                "DELETE FROM index_job WHERE id = (SELECT MIN(id) FROM index_job)"
+                " RETURNING doc_id,type,line;",
+                -1, &db->pop_index_job_stmt, NULL
+        ));
+
+    } else if (db->type == IPC_PRODUCER_DATABASE) {
+        char sql[40];
+        int max_size_mb = 10; // TODO: read from args.
+
+        snprintf(sql, sizeof(sql), "PRAGMA max_page_count=%d", (max_size_mb * 1024 * 1024) / 4096);
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, sql, NULL, NULL, NULL));
+
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db, "INSERT INTO parse_job (filepath,mtime,st_size) VALUES (?,?,?);", -1,
+                &db->insert_parse_job_stmt, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_prepare_v2(
+                db->db, "INSERT INTO index_job (doc_id,type,line) VALUES (?,?,?);", -1,
+                &db->insert_index_job_stmt, NULL));
+
+        sqlite3_create_function(
+                db->db,
+                "path_parent",
+                1,
+                SQLITE_UTF8,
+                NULL,
+                path_parent_func,
+                NULL,
+                NULL
+        );
+    }
+
+}
+
+void database_close(database_t *db, int optimize) {
+    LOG_DEBUGF("database.c", "Closing database %s", db->filename);
+
+    if (optimize) {
+        LOG_DEBUG("database.c", "Optimizing database");
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "VACUUM;", NULL, NULL, NULL));
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "PRAGMA optimize;", NULL, NULL, NULL));
+    }
+
+    sqlite3_close(db->db);
+
+    if (db->type == IPC_PRODUCER_DATABASE) {
+        remove(db->filename);
+    }
+
+    free(db);
+    db = NULL;
+}
+
+void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len) {
+    sqlite3_bind_text(db->select_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
+    sqlite3_bind_int(db->select_thumbnail_stmt, 2, num);
+
+    int ret = sqlite3_step(db->select_thumbnail_stmt);
+
+    if (ret == SQLITE_DONE) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
+        *return_value_len = 0;
+        return NULL;
+    }
+
+    CRASH_IF_STMT_FAIL(ret);
+
+    const void *blob = sqlite3_column_blob(db->select_thumbnail_stmt, 0);
+    const int blob_size = sqlite3_column_bytes(db->select_thumbnail_stmt, 0);
+
+    *return_value_len = blob_size;
+    void *return_data = malloc(blob_size);
+    memcpy(return_data, blob, blob_size);
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->select_thumbnail_stmt));
+
+    return return_data;
+}
+
+void database_write_index_descriptor(database_t *db, index_descriptor_t *desc) {
+
+    sqlite3_exec(db->db, "DELETE FROM descriptor;", NULL, NULL, NULL);
+
+    sqlite3_stmt *stmt;
+
+    sqlite3_prepare_v2(db->db, "INSERT INTO descriptor (id, version_major, version_minor, version_patch,"
+                               " root, name, rewrite_url, timestamp) VALUES (?,?,?,?,?,?,?,?);", -1, &stmt, NULL);
+    sqlite3_bind_text(stmt, 1, desc->id, -1, SQLITE_STATIC);
+    sqlite3_bind_int(stmt, 2, desc->version_major);
+    sqlite3_bind_int(stmt, 3, desc->version_minor);
+    sqlite3_bind_int(stmt, 4, desc->version_patch);
+    sqlite3_bind_text(stmt, 5, desc->root, -1, SQLITE_STATIC);
+    sqlite3_bind_text(stmt, 6, desc->name, -1, SQLITE_STATIC);
+    sqlite3_bind_text(stmt, 7, desc->rewrite_url, -1, SQLITE_STATIC);
+    sqlite3_bind_int64(stmt, 8, desc->timestamp);
+
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+    sqlite3_finalize(stmt);
+}
+
+index_descriptor_t *database_read_index_descriptor(database_t *db) {
+
+    sqlite3_stmt *stmt;
+
+    sqlite3_prepare_v2(db->db, "SELECT id, version_major, version_minor, version_patch,"
+                               " root, name, rewrite_url, timestamp FROM descriptor;", -1, &stmt, NULL);
+
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+    const char *id = (char *) sqlite3_column_text(stmt, 0);
+    int v_major = sqlite3_column_int(stmt, 1);
+    int v_minor = sqlite3_column_int(stmt, 2);
+    int v_patch = sqlite3_column_int(stmt, 3);
+    const char *root = (char *) sqlite3_column_text(stmt, 4);
+    const char *name = (char *) sqlite3_column_text(stmt, 5);
+    const char *rewrite_url = (char *) sqlite3_column_text(stmt, 6);
+    int timestamp = sqlite3_column_int(stmt, 7);
+
+    index_descriptor_t *desc = malloc(sizeof(index_descriptor_t));
+    strcpy(desc->id, id);
+    snprintf(desc->version, sizeof(desc->version), "%d.%d.%d", v_major, v_minor, v_patch);
+    desc->version_major = v_major;
+    desc->version_minor = v_minor;
+    desc->version_patch = v_patch;
+    strcpy(desc->root, root);
+    strcpy(desc->name, name);
+    strcpy(desc->rewrite_url, rewrite_url);
+    desc->timestamp = timestamp;
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_finalize(stmt));
+
+    return desc;
+}
+
+database_iterator_t *database_create_delete_list_iterator(database_t *db) {
+
+    sqlite3_stmt *stmt;
+    sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list;", -1, &stmt, NULL);
+
+    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
+
+    iter->stmt = stmt;
+    iter->db = db;
+
+    return iter;
+}
+
+char *database_delete_list_iter(database_iterator_t *iter) {
+    int ret = sqlite3_step(iter->stmt);
+
+    if (ret == SQLITE_ROW) {
+        const char *id = (const char *) sqlite3_column_text(iter->stmt, 0);
+        char *id_heap = malloc(strlen(id) + 1);
+        strcpy(id_heap, id);
+        return id_heap;
+    }
+
+    if (ret != SQLITE_DONE) {
+        LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
+    }
+
+    if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
+        LOG_FATALF("database.c", "FIXME: delete iter returned %s", sqlite3_errmsg(iter->db->db));
+    }
+
+    iter->stmt = NULL;
+
+    return NULL;
+}
+
+database_iterator_t *database_create_document_iterator(database_t *db) {
+
+    sqlite3_stmt *stmt;
+
+    // TODO optimization: remove mtime, size, _id from json_data
+
+    sqlite3_prepare_v2(db->db, "WITH doc (j) AS (SELECT CASE"
+                               " WHEN sc.json_data IS NULL THEN"
+                               "  CASE"
+                               "   WHEN t.tag IS NULL THEN"
+                               "    document.json_data"
+                               "   ELSE"
+                               "    json_set(document.json_data, '$.tag', json_group_array(t.tag))"
+                               "   END"
+                               " ELSE"
+                               "  CASE"
+                               "   WHEN t.tag IS NULL THEN"
+                               "    json_patch(document.json_data, sc.json_data)"
+                               "   ELSE"
+                               //   This will overwrite any tags specified in the sidecar file!
+                               //   TODO: concatenate the two arrays?
+                               "    json_set(json_patch(document.json_data, sc.json_data), '$.tag', json_group_array(t.tag))"
+                               "   END"
+                               " END"
+                               " FROM document"
+                               " LEFT JOIN document_sidecar sc ON document.id = sc.id"
+                               " LEFT JOIN tag t ON document.id = t.id"
+                               " GROUP BY document.id)"
+                               " SELECT json_set(j, '$.index', (SELECT id FROM descriptor)) FROM doc", -1, &stmt, NULL);
+
+    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
+
+    iter->stmt = stmt;
+    iter->db = db;
+
+    return iter;
+}
+
+cJSON *database_document_iter(database_iterator_t *iter) {
+
+    if (iter->stmt == NULL) {
+        LOG_ERROR("database.c", "FIXME: database_document_iter() called after iteration stopped");
+        return NULL;
+    }
+
+    int ret = sqlite3_step(iter->stmt);
+
+    if (ret == SQLITE_ROW) {
+        const char *json_string = (const char *) sqlite3_column_text(iter->stmt, 0);
+        return cJSON_Parse(json_string);
+    }
+
+    if (ret != SQLITE_DONE) {
+        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
+    }
+
+    if (sqlite3_finalize(iter->stmt) != SQLITE_OK) {
+        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
+    }
+
+    iter->stmt = NULL;
+
+    return NULL;
+}
+
+cJSON *database_incremental_scan_begin(database_t *db) {
+    LOG_DEBUG("database.c", "Preparing database for incremental scan");
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "UPDATE document SET marked=0;", NULL, NULL, NULL));
+}
+
+cJSON *database_incremental_scan_end(database_t *db) {
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
+            db->db,
+            "DELETE FROM delete_list WHERE id IN (SELECT id FROM document WHERE marked=1);",
+            NULL, NULL, NULL
+    ));
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
+            db->db,
+            "DELETE FROM thumbnail WHERE id IN (SELECT id FROM document WHERE marked=0);",
+            NULL, NULL, NULL
+    ));
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
+            db->db,
+            "INSERT INTO delete_list (id) SELECT id FROM document WHERE marked=0;",
+            NULL, NULL, NULL
+    ));
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
+            db->db,
+            "DELETE FROM document_sidecar WHERE id IN (SELECT id FROM document WHERE marked=0);",
+            NULL, NULL, NULL
+    ));
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
+            db->db,
+            "DELETE FROM document WHERE marked=0;",
+            NULL, NULL, NULL
+    ));
+}
+
+int database_mark_document(database_t *db, const char *id, int mtime) {
+    sqlite3_bind_text(db->mark_document_stmt, 1, id, -1, SQLITE_STATIC);
+    sqlite3_bind_int(db->mark_document_stmt, 2, mtime);
+
+    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
+    int ret = sqlite3_step(db->mark_document_stmt);
+
+    if (ret == SQLITE_ROW) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
+        pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+        return TRUE;
+    }
+
+    if (ret == SQLITE_DONE) {
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->mark_document_stmt));
+        pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+        return FALSE;
+    }
+    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+
+    CRASH_IF_STMT_FAIL(ret);
+}
+
+void database_write_document(database_t *db, document_t *doc, const char *json_data) {
+    sqlite3_bind_text(db->write_document_stmt, 1, doc->doc_id, -1, SQLITE_STATIC);
+    sqlite3_bind_int(db->write_document_stmt, 2, doc->mtime);
+    sqlite3_bind_int64(db->write_document_stmt, 3, (long) doc->size);
+    sqlite3_bind_text(db->write_document_stmt, 4, json_data, -1, SQLITE_STATIC);
+
+    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
+    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_stmt));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_stmt));
+    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+}
+
+
+void database_write_document_sidecar(database_t *db, const char *id, const char *json_data) {
+    sqlite3_bind_text(db->write_document_sidecar_stmt, 1, id, -1, SQLITE_STATIC);
+    sqlite3_bind_text(db->write_document_sidecar_stmt, 2, json_data, -1, SQLITE_STATIC);
+
+    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
+    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_document_sidecar_stmt));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_document_sidecar_stmt));
+    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+}
+
+void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size) {
+    sqlite3_bind_text(db->write_thumbnail_stmt, 1, id, -1, SQLITE_STATIC);
+    sqlite3_bind_int(db->write_thumbnail_stmt, 2, num);
+    sqlite3_bind_blob(db->write_thumbnail_stmt, 3, data, (int) data_size, SQLITE_STATIC);
+
+    pthread_mutex_lock(&db->ipc_ctx->index_db_mutex);
+    CRASH_IF_STMT_FAIL(sqlite3_step(db->write_thumbnail_stmt));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->write_thumbnail_stmt));
+    pthread_mutex_unlock(&db->ipc_ctx->index_db_mutex);
+}
+
+
+//void database_create_fts_index(database_t *db, database_t *fts_db) {
+//    // In a separate file,
+//
+//    // use database_initialize() to create FTS schema
+//    // if --force-reset, then truncate the tables first
+//
+//    /*
+//     * create/append fts table
+//     *
+//     * create/append scalar index table with
+//     *  id,index,size,mtime,mime
+//     *
+//     * create/append path index table with
+//     *  index,path,depth
+//     *
+//     * content table is a view with SELECT UNION for all attached tables
+//     *  random_seed column
+//     */
+//
+//    // INSERT INTO ft(ft) VALUES('optimize');
+//}
+
+job_t *database_get_work(database_t *db, job_type_t job_type) {
+    job_t *job;
+
+    pthread_mutex_lock(&db->ipc_ctx->mutex);
+    while (db->ipc_ctx->job_count == 0 && !db->ipc_ctx->no_more_jobs) {
+        pthread_cond_timedwait_ms(&db->ipc_ctx->has_work_cond, &db->ipc_ctx->mutex, 10);
+    }
+    pthread_mutex_unlock(&db->ipc_ctx->mutex);
+
+    pthread_mutex_lock(&db->ipc_ctx->db_mutex);
+
+    if (job_type == JOB_PARSE_JOB) {
+        int ret = sqlite3_step(db->pop_parse_job_stmt);
+        if (ret == SQLITE_DONE) {
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
+            pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+            return NULL;
+        } else {
+            CRASH_IF_STMT_FAIL(ret);
+        }
+
+        job = malloc(sizeof(*job));
+
+        job->parse_job = create_parse_job(
+                (const char *) sqlite3_column_text(db->pop_parse_job_stmt, 0),
+                sqlite3_column_int(db->pop_parse_job_stmt, 1),
+                sqlite3_column_int64(db->pop_parse_job_stmt, 2));
+
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_parse_job_stmt));
+    } else {
+
+        int ret = sqlite3_step(db->pop_index_job_stmt);
+
+        if (ret == SQLITE_DONE) {
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
+            pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+            return NULL;
+        }
+
+        CRASH_IF_STMT_FAIL(ret);
+
+        job = malloc(sizeof(*job));
+
+        const char *line = (const char *) sqlite3_column_text(db->pop_index_job_stmt, 2);
+        if (line != NULL) {
+            job->bulk_line = malloc(sizeof(es_bulk_line_t) + strlen(line) + 1);
+            strcpy(job->bulk_line->line, line);
+        } else {
+            job->bulk_line = malloc(sizeof(es_bulk_line_t));
+        }
+        strcpy(job->bulk_line->doc_id, (const char *) sqlite3_column_text(db->pop_index_job_stmt, 0));
+        job->bulk_line->type = sqlite3_column_int(db->pop_index_job_stmt, 1);
+        job->bulk_line->next = NULL;
+
+        CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->pop_index_job_stmt));
+    }
+
+    pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+
+    pthread_mutex_lock(&db->ipc_ctx->mutex);
+    db->ipc_ctx->job_count -= 1;
+    pthread_mutex_unlock(&db->ipc_ctx->mutex);
+
+    job->type = job_type;
+    return job;
+}
+
+void database_add_work(database_t *db, job_t *job) {
+    int ret;
+
+    pthread_mutex_lock(&db->ipc_ctx->db_mutex);
+
+    if (job->type == JOB_PARSE_JOB) {
+        do {
+            sqlite3_bind_text(db->insert_parse_job_stmt, 1, job->parse_job->filepath, -1, SQLITE_STATIC);
+            sqlite3_bind_int(db->insert_parse_job_stmt, 2, job->parse_job->vfile.mtime);
+            sqlite3_bind_int64(db->insert_parse_job_stmt, 3, (long) job->parse_job->vfile.st_size);
+
+            ret = sqlite3_step(db->insert_parse_job_stmt);
+
+            if (ret == SQLITE_FULL) {
+                usleep(1000000);
+            } else {
+                CRASH_IF_STMT_FAIL(ret);
+            }
+
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->insert_parse_job_stmt));
+        } while (ret != SQLITE_DONE);
+    } else if (job->type == JOB_BULK_LINE) {
+        do {
+            sqlite3_bind_text(db->insert_index_job_stmt, 1, job->bulk_line->doc_id, -1, SQLITE_STATIC);
+            sqlite3_bind_int(db->insert_index_job_stmt, 2, job->bulk_line->type);
+            sqlite3_bind_text(db->insert_index_job_stmt, 3, job->bulk_line->line, -1, SQLITE_STATIC);
+
+            ret = sqlite3_step(db->insert_index_job_stmt);
+
+            if (ret == SQLITE_FULL) {
+                sqlite3_reset(db->insert_index_job_stmt);
+                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+                usleep(100000);
+                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
+                continue;
+            } else {
+                CRASH_IF_STMT_FAIL(ret);
+            }
+
+            ret = sqlite3_reset(db->insert_index_job_stmt);
+            if (ret == SQLITE_FULL) {
+                pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+                usleep(100000);
+                pthread_mutex_lock(&db->ipc_ctx->db_mutex);
+            }
+
+        } while (ret != SQLITE_DONE && ret != SQLITE_OK);
+    } else {
+        LOG_FATAL("database.c", "FIXME: invalid job type");
+    }
+    pthread_mutex_unlock(&db->ipc_ctx->db_mutex);
+
+    pthread_mutex_lock(&db->ipc_ctx->mutex);
+    db->ipc_ctx->job_count += 1;
+    pthread_cond_signal(&db->ipc_ctx->has_work_cond);
+    pthread_mutex_unlock(&db->ipc_ctx->mutex);
+}
--- a/src/database/database.h
+++ b/src/database/database.h
@ -0,0 +1,155 @@
+#ifndef SIST2_DATABASE_H
+#define SIST2_DATABASE_H
+
+#include <sqlite3.h>
+#include <cjson/cJSON.h>
+#include "src/sist.h"
+#include "src/index/elastic.h"
+
+typedef struct index_descriptor index_descriptor_t;
+
+extern const char *IpcDatabaseSchema;
+extern const char *IndexDatabaseSchema;
+
+typedef enum {
+    INDEX_DATABASE,
+    IPC_CONSUMER_DATABASE,
+    IPC_PRODUCER_DATABASE,
+    FTS_DATABASE
+} database_type_t;
+
+typedef enum {
+    JOB_UNDEFINED,
+    JOB_BULK_LINE,
+    JOB_PARSE_JOB
+} job_type_t;
+
+typedef struct {
+    job_type_t type;
+    union {
+        parse_job_t *parse_job;
+        es_bulk_line_t *bulk_line;
+    };
+} job_t;
+
+typedef struct {
+    int job_count;
+    int no_more_jobs;
+    int completed_job_count;
+
+    pthread_mutex_t mutex;
+    pthread_mutex_t db_mutex;
+    pthread_mutex_t index_db_mutex;
+    pthread_cond_t has_work_cond;
+    char current_job[MAX_THREADS][PATH_MAX * 2];
+} database_ipc_ctx_t;
+
+typedef struct database {
+    char filename[PATH_MAX];
+    database_type_t type;
+    sqlite3 *db;
+
+    // Prepared statements
+    sqlite3_stmt *select_thumbnail_stmt;
+    sqlite3_stmt *treemap_merge_up_update_stmt;
+    sqlite3_stmt *treemap_merge_up_delete_stmt;
+
+    sqlite3_stmt *mark_document_stmt;
+    sqlite3_stmt *write_document_stmt;
+    sqlite3_stmt *write_document_sidecar_stmt;
+    sqlite3_stmt *write_thumbnail_stmt;
+
+    sqlite3_stmt *insert_parse_job_stmt;
+    sqlite3_stmt *insert_index_job_stmt;
+    sqlite3_stmt *pop_parse_job_stmt;
+    sqlite3_stmt *pop_index_job_stmt;
+
+    database_ipc_ctx_t *ipc_ctx;
+} database_t;
+
+typedef struct {
+    database_t *db;
+    sqlite3_stmt *stmt;
+} database_iterator_t;
+
+typedef struct {
+    const char *path;
+    const char *parent;
+    long size;
+} treemap_row_t;
+
+static treemap_row_t null_treemap_row = {0, 0, 0};
+
+
+database_t *database_create(const char *filename, database_type_t type);
+
+void database_initialize(database_t *db);
+
+void database_open(database_t *db);
+
+void database_close(database_t *, int optimize);
+
+void database_write_thumbnail(database_t *db, const char *id, int num, void *data, size_t data_size);
+
+void *database_read_thumbnail(database_t *db, const char *id, int num, size_t *return_value_len);
+
+void database_write_index_descriptor(database_t *db, index_descriptor_t *desc);
+
+index_descriptor_t *database_read_index_descriptor(database_t *db);
+
+void database_write_document(database_t *db, document_t *doc, const char *json_data);
+
+database_iterator_t *database_create_document_iterator(database_t *db);
+
+cJSON *database_document_iter(database_iterator_t *);
+
+#define database_document_iter_foreach(element, iter) \
+    for (cJSON *element = database_document_iter(iter); element != NULL; element = database_document_iter(iter))
+
+database_iterator_t *database_create_delete_list_iterator(database_t *db);
+
+char * database_delete_list_iter(database_iterator_t *iter);
+
+#define database_delete_list_iter_foreach(element, iter) \
+    for (char *element = database_delete_list_iter(iter); element != NULL; element = database_delete_list_iter(iter))
+
+
+cJSON *database_incremental_scan_begin(database_t *db);
+
+cJSON *database_incremental_scan_end(database_t *db);
+
+int database_mark_document(database_t *db, const char *id, int mtime);
+
+void database_write_document_sidecar(database_t *db, const char *id, const char *json_data);
+
+database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold);
+
+treemap_row_t database_treemap_iter(database_iterator_t *iter);
+
+#define database_treemap_iter_foreach(element, iter) \
+    for (treemap_row_t element = database_treemap_iter(iter); element.path != NULL; element = database_treemap_iter(iter))
+
+
+void database_generate_stats(database_t *db, double treemap_threshold);
+
+job_t *database_get_work(database_t *db, job_type_t job_type);
+
+void database_add_work(database_t *db, job_t *job);
+
+//void database_index(database_t *db);
+
+#define CRASH_IF_STMT_FAIL(x) do { \
+        int return_value = x;                \
+        if (return_value != SQLITE_DONE && return_value != SQLITE_ROW) {     \
+            LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
+        }                           \
+    } while (0)
+
+#define CRASH_IF_NOT_SQLITE_OK(x) do { \
+        int return_value = x;                \
+        if (return_value != SQLITE_OK) {     \
+            LOG_FATALF("database.c", "Sqlite error @ database.c:%d : (%d) %s", __LINE__, return_value, sqlite3_errmsg(db->db)); \
+        }                           \
+    } while (0)
+
+#endif //SIST2_DATABASE_H
--- a/src/database/database_schema.c
+++ b/src/database/database_schema.c
@ -0,0 +1,78 @@
+
+const char *IpcDatabaseSchema =
+        "CREATE TABLE parse_job ("
+        "   id INTEGER PRIMARY KEY,"
+        "   filepath TEXT NOT NULL,"
+        "   mtime INTEGER NOT NULL,"
+        "   st_size INTEGER NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE index_job ("
+        "   id INTEGER PRIMARY KEY,"
+        "   doc_id TEXT NOT NULL CHECK ( length(doc_id) = 32 ),"
+        "   type INTEGER NOT NULL,"
+        "   line TEXT"
+        ");";
+
+const char *IndexDatabaseSchema =
+        "CREATE TABLE thumbnail ("
+        "   id TEXT NOT NULL CHECK ( length(id) = 32 ),"
+        "   num INTEGER NOT NULL,"
+        "   data BLOB NOT NULL,"
+        "   PRIMARY KEY(id, num)"
+        ") WITHOUT ROWID;"
+        ""
+        "CREATE TABLE document ("
+        "   id TEXT PRIMARY KEY CHECK ( length(id) = 32 ),"
+        "   marked INTEGER NOT NULL DEFAULT (1),"
+        "   mtime INTEGER NOT NULL,"
+        "   size INTEGER NOT NULL,"
+        "   json_data TEXT NOT NULL CHECK ( json_valid(json_data) )"
+        ") WITHOUT ROWID;"
+        ""
+        "CREATE TABLE delete_list ("
+        "   id TEXT PRIMARY KEY CHECK ( length(id) = 32 )"
+        ") WITHOUT ROWID;"
+        ""
+        "CREATE TABLE tag ("
+        "   id TEXT NOT NULL,"
+        "   tag TEXT NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE document_sidecar ("
+        "   id TEXT PRIMARY KEY NOT NULL,"
+        "   json_data TEXT NOT NULL"
+        ") WITHOUT ROWID;"
+        ""
+        "CREATE TABLE descriptor ("
+        "   id TEXT NOT NULL,"
+        "   version_major INTEGER NOT NULL,"
+        "   version_minor INTEGER NOT NULL,"
+        "   version_patch INTEGER NOT NULL,"
+        "   root TEXT NOT NULL,"
+        "   name TEXT NOT NULL,"
+        "   rewrite_url TEXT,"
+        "   timestamp INTEGER NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE stats_treemap ("
+        "   path TEXT NOT NULL,"
+        "   size INTEGER NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE stats_size_agg ("
+        "   bucket INTEGER NOT NULL,"
+        "   count INTEGER NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE stats_date_agg ("
+        "   bucket INTEGER NOT NULL,"
+        "   count INTEGER NOT NULL"
+        ");"
+        ""
+        "CREATE TABLE stats_mime_agg ("
+        "   mime TEXT NOT NULL,"
+        "   size INTEGER NOT NULL,"
+        "   count INTEGER NOT NULL"
+        ");";
+
--- a/src/database/database_stats.c
+++ b/src/database/database_stats.c
@ -0,0 +1,159 @@
+#include "database.h"
+#include "src/sist.h"
+#include "src/ctx.h"
+
+#define TREEMAP_MINIMUM_MERGES_TO_CONTINUE (100)
+#define SIZE_BUCKET (long)(5 * 1000 * 1000)
+#define DATE_BUCKET (long)(2629800) // ~30 days
+
+database_iterator_t *database_create_treemap_iterator(database_t *db, long threshold) {
+
+    sqlite3_stmt *stmt;
+
+    sqlite3_prepare_v2(db->db,
+                       "SELECT path, path_parent(path), size FROM tm"
+                       " WHERE path_parent(path) IN (SELECT path FROM tm)"
+                       " AND size<?",
+                       -1, &stmt, NULL);
+
+    sqlite3_bind_int64(stmt, 1, threshold);
+
+    database_iterator_t *iter = malloc(sizeof(database_iterator_t));
+
+    iter->stmt = stmt;
+    iter->db = db;
+
+    return iter;
+}
+
+treemap_row_t database_treemap_iter(database_iterator_t *iter) {
+
+    if (iter->stmt == NULL) {
+        LOG_FATAL("database.c", "FIXME: database_treemap_iter() called after iteration stopped");
+    }
+
+    int ret = sqlite3_step(iter->stmt);
+
+    if (ret == SQLITE_ROW) {
+        treemap_row_t row = {
+                .path = (const char *) sqlite3_column_text(iter->stmt, 0),
+                .parent = (const char *) sqlite3_column_text(iter->stmt, 1),
+                .size = sqlite3_column_int64(iter->stmt, 2)
+        };
+
+        return row;
+    }
+
+    if (ret != SQLITE_DONE) {
+        LOG_FATALF("database.c", "FIXME: doc iter returned %s", sqlite3_errmsg(iter->db->db));
+    }
+
+    sqlite3_finalize(iter->stmt);
+    iter->stmt = NULL;
+
+    return (treemap_row_t) {NULL, NULL, 0};
+}
+
+void database_generate_stats(database_t *db, double treemap_threshold) {
+
+    LOG_INFO("database.c", "Generating stats");
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_size_agg;", NULL, NULL, NULL));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_date_agg;", NULL, NULL, NULL));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_mime_agg;", NULL, NULL, NULL));
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db, "DELETE FROM stats_treemap;", NULL, NULL, NULL));
+
+    CRASH_IF_NOT_SQLITE_OK(
+            sqlite3_exec(db->db, "CREATE TEMP TABLE tm(path TEXT PRIMARY KEY, size INT);", NULL, NULL, NULL));
+
+    sqlite3_prepare_v2(db->db, "UPDATE tm SET size=size+? WHERE path=?;", -1, &db->treemap_merge_up_update_stmt, NULL);
+    sqlite3_prepare_v2(db->db, "DELETE FROM tm WHERE path = ?;", -1, &db->treemap_merge_up_delete_stmt, NULL);
+
+    // size aggregation
+    sqlite3_stmt *stmt;
+    sqlite3_prepare_v2(db->db, "INSERT INTO stats_size_agg"
+                               " SELECT"
+                               "  cast(size / ?1 as int) * ?1 as bucket,"
+                               "  count(*) as count"
+                               " FROM document"
+                               " GROUP BY bucket", -1, &stmt, NULL);
+    sqlite3_bind_int(stmt, 1, SIZE_BUCKET);
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+    sqlite3_finalize(stmt);
+
+    // date aggregation
+    sqlite3_prepare_v2(db->db, "INSERT INTO stats_date_agg"
+                               " SELECT"
+                               "  cast(mtime / ?1 as int) * ?1 as bucket,"
+                               "  count(*) as count"
+                               " FROM document"
+                               " GROUP BY bucket", -1, &stmt, NULL);
+    sqlite3_bind_int(stmt, 1, DATE_BUCKET);
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+    sqlite3_finalize(stmt);
+
+    // mime aggregation
+    sqlite3_prepare_v2(db->db, "INSERT INTO stats_mime_agg"
+                               " SELECT"
+                               "  (json_data->>'mime') as bucket,"
+                               "  sum(size),"
+                               "  count(*)"
+                               " FROM document"
+                               " WHERE bucket IS NOT NULL"
+                               " GROUP BY bucket", -1, &stmt, NULL);
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+    sqlite3_finalize(stmt);
+
+    // Treemap
+    sqlite3_prepare_v2(db->db, "SELECT SUM(size) FROM document;", -1, &stmt, NULL);
+    CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+    long total_size = sqlite3_column_int64(stmt, 0);
+    long threshold = (long) ((double) total_size * treemap_threshold);
+    sqlite3_finalize(stmt);
+
+    // flat map
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
+                                        "INSERT INTO tm (path, size) SELECT json_data->>'path' as path, sum(size)"
+                                        " FROM document WHERE json_data->>'parent' IS NULL GROUP BY path;",
+                                        NULL, NULL, NULL));
+
+    // Merge up
+    int merged_rows = 0;
+    do {
+        if (merged_rows) {
+            LOG_INFOF("database.c", "Treemap merge iteration (%d rows changed)", merged_rows);
+        }
+        merged_rows = 0;
+
+        sqlite3_prepare_v2(db->db,
+                           "INSERT INTO tm (path, size) SELECT path_parent(path) as parent, 0 "
+                           " FROM tm WHERE parent not IN (SELECT path FROM tm) AND size<?"
+                           " ON CONFLICT DO NOTHING;", -1, &stmt, NULL);
+        sqlite3_bind_int64(stmt, 1, threshold);
+        CRASH_IF_STMT_FAIL(sqlite3_step(stmt));
+
+        database_iterator_t *iter = database_create_treemap_iterator(db, threshold);
+        database_treemap_iter_foreach(row, iter) {
+            sqlite3_bind_int64(db->treemap_merge_up_update_stmt, 1, row.size);
+            sqlite3_bind_text(db->treemap_merge_up_update_stmt, 2, row.parent, -1, SQLITE_STATIC);
+            CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_update_stmt));
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_update_stmt));
+
+            sqlite3_bind_text(db->treemap_merge_up_delete_stmt, 1, row.path, -1, SQLITE_STATIC);
+            CRASH_IF_STMT_FAIL(sqlite3_step(db->treemap_merge_up_delete_stmt));
+            CRASH_IF_NOT_SQLITE_OK(sqlite3_reset(db->treemap_merge_up_delete_stmt));
+
+            merged_rows += 1;
+        }
+    } while (merged_rows > TREEMAP_MINIMUM_MERGES_TO_CONTINUE);
+
+    CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(db->db,
+                                        "INSERT INTO stats_treemap (path, size) SELECT path,size FROM tm;",
+                                        NULL, NULL, NULL));
+
+    LOG_INFO("database.c", "Done!");
+}
+
--- a/src/database/database_stats.h
+++ b/src/database/database_stats.h
@ -0,0 +1,5 @@
+#ifndef SIST2_DATABASE_STATS_H
+#define SIST2_DATABASE_STATS_H
+
+
+#endif //SIST2_DATABASE_STATS_H
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@ -29,7 +29,7 @@ void destroy_indexer(es_indexer_t *indexer) {
        return;
    }

-    LOG_DEBUG("elastic.c", "Destroying indexer")
+    LOG_DEBUG("elastic.c", "Destroying indexer");

    if (indexer->es_url != NULL) {
        free(indexer->es_url);
@ -64,18 +64,21 @@ void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
    cJSON_Delete(line);
 }

-void index_json_func(void *arg) {
-    es_bulk_line_t *line = arg;
-    elastic_index_line(line);
+void index_json_func(job_t *job) {
+    elastic_index_line(job->bulk_line);
 }

-void delete_document(const char* document_id_str, void* UNUSED(_data)) {
+void delete_document(const char *document_id) {
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
+
    bulk_line->type = ES_BULK_LINE_DELETE;
    bulk_line->next = NULL;
+    strcpy(bulk_line->doc_id, document_id);

-    strcpy(bulk_line->doc_id, document_id_str);
-    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
+    tpool_add_work(IndexCtx.pool, &(job_t) {
+            .type = JOB_BULK_LINE,
+            .bulk_line = bulk_line,
+    });
 }


@ -92,7 +95,10 @@ void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
    bulk_line->next = NULL;

    cJSON_free(json);
-    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
+    tpool_add_work(IndexCtx.pool, &(job_t) {
+        .type = JOB_BULK_LINE,
+        .bulk_line = bulk_line,
+    });
 }

 void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
@ -266,7 +272,7 @@ void print_error(response_t *r) {
 void _elastic_flush(int max) {

    if (max == 0) {
-        LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
+        LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue");
        return;
    }

@ -279,13 +285,13 @@ void _elastic_flush(int max) {
    response_t *r = web_post(bulk_url, buf, IndexCtx.es_insecure_ssl);

    if (r->status_code == 0) {
-        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
+        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url);
    }

    if (r->status_code == 413) {

        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id);
            free_response(r);
            free(buf);
            free_queue(1);
@ -306,7 +312,7 @@ void _elastic_flush(int max) {

        free_response(r);
        free(buf);
-        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
+        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay");
        usleep(1000000 * 20);
        _elastic_flush(max);
        return;
@ -441,7 +447,7 @@ es_version_t *elastic_get_version(const char *es_url, int insecure) {
    }

    if (cJSON_GetObjectItem(response, "error") != NULL) {
-        LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
+        LOG_WARNING("elastic.c", "Could not get Elasticsearch version");
        print_error(r);
        free_response(r);
        return NULL;
@ -477,7 +483,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
    IndexCtx.es_version = es_version;

    if (es_version == NULL) {
-        LOG_FATAL("elastic.c", "Could not get ES version")
+        LOG_FATAL("elastic.c", "Could not get ES version");
    }

    LOG_INFOF("elastic.c",
@ -485,7 +491,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
              format_es_version(es_version), IS_SUPPORTED_ES_VERSION(es_version), IS_LEGACY_VERSION(es_version));

    if (!IS_SUPPORTED_ES_VERSION(es_version)) {
-        LOG_FATAL("elastic.c", "This elasticsearch version is not supported!")
+        LOG_FATAL("elastic.c", "This elasticsearch version is not supported!");
    }

    char *settings = NULL;
@ -512,7 +518,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s

        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not create index")
+            LOG_FATAL("elastic.c", "Could not create index");
        }

        LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
@ -533,12 +539,13 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        LOG_INFOF("elastic.c", "Update ES settings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not update user settings")
+            LOG_FATAL("elastic.c", "Could not update user settings");
        }
        free_response(r);

        if (IS_LEGACY_VERSION(es_version)) {
-            snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
+            snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url,
+                     IndexCtx.es_index);
        } else {
            snprintf(url, sizeof(url), "%s/%s/_mappings", IndexCtx.es_url, IndexCtx.es_index);
        }
@ -547,7 +554,7 @@ void elastic_init(int force_reset, const char *user_mappings, const char *user_s
        LOG_INFOF("elastic.c", "Update ES mappings <%d>", r->status_code);
        if (r->status_code != 200) {
            print_error(r);
-            LOG_FATAL("elastic.c", "Could not update user mappings")
+            LOG_FATAL("elastic.c", "Could not update user mappings");
        }
        free_response(r);

--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@ -46,7 +46,7 @@ void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);

 void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);

-void delete_document(const char *document_id_str, void* data);
+void delete_document(const char *document_id);

 es_indexer_t *create_indexer(const char *url, const char *index);

--- a/src/index/web.c
+++ b/src/index/web.c
@ -65,7 +65,7 @@ void web_post_async_poll(subreq_ctx_t *req) {
        curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);

        if (req->response->status_code == 0) {
-            LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer)
+            LOG_ERRORF("web.c", "CURL Error: %s", req->curl_err_buffer);
        }

        curl_multi_cleanup(req->multi);
@ -104,7 +104,7 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
    curl_multi_add_handle(req->multi, curl);
    curl_multi_perform(req->multi, &req->running_handles);

-    LOG_DEBUGF("web.c", "async request POST %s", url)
+    LOG_DEBUGF("web.c", "async request POST %s", url);

    return req;
 }
@ -136,7 +136,7 @@ response_t *web_get(const char *url, int timeout, int insecure) {
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

    if (resp->status_code == 0) {
-        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
    }

    curl_easy_cleanup(curl);
@ -180,7 +180,7 @@ response_t *web_post(const char *url, const char *data, int insecure) {
    resp->size = buffer.cur;

    if (resp->status_code == 0) {
-        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
+        LOG_ERRORF("web.c", "CURL Error: %s", err_buffer);
    }

    curl_easy_cleanup(curl);
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@ -1,9 +1,7 @@
 #include "src/ctx.h"
 #include "serialize.h"
-#include "src/parsing/parse.h"
 #include "src/parsing/mime.h"

-#include <zstd.h>

 char *get_meta_key_text(enum metakey meta_key) {

@ -79,7 +77,7 @@ char *get_meta_key_text(enum metakey meta_key) {
        case MetaChecksum:
            return "checksum";
        default:
-        LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
+        LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key);
    }
 }

@ -175,7 +173,7 @@ char *build_json_string(document_t *doc) {
                break;
            }
            default:
-            LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key))
+            LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key));
        }

        meta_line_t *tmp = meta;
@ -189,391 +187,10 @@ char *build_json_string(document_t *doc) {
    return json_str;
 }

-static struct {
-    FILE *out_file;
-    size_t buf_out_size;
-
-    void *buf_out;
-
-    ZSTD_CCtx *cctx;
-} WriterCtx = {
-        .out_file =  NULL
-};
-
-#define ZSTD_COMPRESSION_LEVEL 10
-
-void initialize_writer_ctx(const char *file_path) {
-    WriterCtx.out_file = fopen(file_path, "wb");
-
-    WriterCtx.buf_out_size = ZSTD_CStreamOutSize();
-    WriterCtx.buf_out = malloc(WriterCtx.buf_out_size);
-
-    WriterCtx.cctx = ZSTD_createCCtx();
-
-    ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL);
-    ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE);
-
-    LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path)
-}
-
-void zstd_write_string(const char *string, const size_t len) {
-    ZSTD_inBuffer input = {string, len, 0};
-
-    do {
-        ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
-        ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue);
-
-        if (output.pos > 0) {
-            ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
-        }
-    } while (input.pos != input.size);
-}
-
-void write_document_func(void *arg) {
-
-    if (WriterCtx.out_file == NULL) {
-        char dstfile[PATH_MAX];
-        snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path);
-        initialize_writer_ctx(dstfile);
-    }
-
-    document_t *doc = arg;
-
-    char *json_str = build_json_string(doc);
-    const size_t json_str_len = strlen(json_str);
-
-    json_str = realloc(json_str, json_str_len + 1);
-    *(json_str + json_str_len) = '\n';
-
-    zstd_write_string(json_str, json_str_len + 1);
-
-    free(json_str);
-    free(doc->filepath);
-}
-
-void zstd_close() {
-    if (WriterCtx.out_file == NULL) {
-        LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup")
-        return;
-    }
-
-    size_t remaining;
-    do {
-        ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
-        remaining = ZSTD_endStream(WriterCtx.cctx, &output);
-
-        if (output.pos > 0) {
-            ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
-        }
-    } while (remaining != 0);
-
-    ZSTD_freeCCtx(WriterCtx.cctx);
-    free(WriterCtx.buf_out);
-    fclose(WriterCtx.out_file);
-
-    LOG_DEBUG("serialize.c", "End zstd stream & close index file")
-}
-
-void writer_cleanup() {
-    zstd_close();
-    WriterCtx.out_file = NULL;
-}
-
-void write_index_descriptor(char *path, index_descriptor_t *desc) {
-    cJSON *json = cJSON_CreateObject();
-    cJSON_AddStringToObject(json, "id", desc->id);
-    cJSON_AddStringToObject(json, "version", desc->version);
-    cJSON_AddStringToObject(json, "root", desc->root);
-    cJSON_AddStringToObject(json, "name", desc->name);
-    cJSON_AddStringToObject(json, "type", desc->type);
-    cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
-    cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
-
-    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
-    if (fd < 0) {
-        LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
-    }
-    char *str = cJSON_Print(json);
-    size_t ret = write(fd, str, strlen(str));
-    if (ret == -1) {
-        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
-    }
-    free(str);
-    close(fd);
-
-    cJSON_Delete(json);
-}
-
-index_descriptor_t read_index_descriptor(char *path) {
-
-    struct stat info;
-    stat(path, &info);
-    int fd = open(path, O_RDONLY);
-
-    if (fd == -1) {
-        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
-    }
-
-    char *buf = malloc(info.st_size + 1);
-    size_t ret = read(fd, buf, info.st_size);
-    if (ret == -1) {
-        LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
-    }
-    *(buf + info.st_size) = '\0';
-    close(fd);
-
-    cJSON *json = cJSON_Parse(buf);
-
-    index_descriptor_t descriptor;
-    descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
-    strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
-    strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
-    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
-    descriptor.root_len = (short) strlen(descriptor.root);
-    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
-    strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
-    if (cJSON_GetObjectItem(json, "type") == NULL) {
-        strcpy(descriptor.type, INDEX_TYPE_NDJSON);
-    } else {
-        strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
-    }
-
-    cJSON_Delete(json);
-    free(buf);
-
-    return descriptor;
-}
-
-
 void write_document(document_t *doc) {
-    tpool_add_work(ScanCtx.writer_pool, write_document_func, doc);
-}
+    char *json_str = build_json_string(doc);

-void thread_cleanup() {
-    cleanup_parse();
-    cleanup_font();
-}
-
-void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) {
-
-    cJSON *document = cJSON_Parse(line);
-    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
-
-    cJSON_AddStringToObject(document, "index", index_id);
-
-    // Load meta from sidecar files
-    cJSON *meta_obj = NULL;
-    if (IndexCtx.meta != NULL) {
-        const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
-        if (meta_string != NULL) {
-            meta_obj = cJSON_Parse(meta_string);
-
-            cJSON *child;
-            for (child = meta_obj->child; child != NULL; child = child->next) {
-                char meta_key[4096];
-                strcpy(meta_key, child->string);
-                cJSON_DeleteItemFromObject(document, meta_key);
-                cJSON_AddItemReferenceToObject(document, meta_key, child);
-            }
-        }
-    }
-
-    // Load tags from tags DB
-    if (IndexCtx.tags != NULL) {
-        const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
-        if (tags_string != NULL) {
-            cJSON *tags_arr = cJSON_Parse(tags_string);
-            cJSON_DeleteItemFromObject(document, "tag");
-            cJSON_AddItemToObject(document, "tag", tags_arr);
-        }
-    }
-
-    func(document, path_md5_str);
-    cJSON_DeleteItemFromObject(document, "_id");
-    cJSON_Delete(document);
-    if (meta_obj) {
-        cJSON_Delete(meta_obj);
-    }
-}
-
-void read_lines(const char *path, const line_processor_t processor) {
-    dyn_buffer_t buf = dyn_buffer_create();
-
-    // Initialize zstd things
-    FILE *file = fopen(path, "rb");
-
-    size_t const buf_in_size = ZSTD_DStreamInSize();
-    void *const buf_in = malloc(buf_in_size);
-
-    size_t const buf_out_size = ZSTD_DStreamOutSize();
-    void *const buf_out = malloc(buf_out_size);
-
-    ZSTD_DCtx *const dctx = ZSTD_createDCtx();
-
-    size_t read;
-    size_t last_ret = 0;
-    while ((read = fread(buf_in, 1, buf_in_size, file))) {
-        ZSTD_inBuffer input = {buf_in, read, 0};
-
-        while (input.pos < input.size) {
-            ZSTD_outBuffer output = {buf_out, buf_out_size, 0};
-
-            size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
-
-            for (int i = 0; i < output.pos; i++) {
-                char c = ((char *) output.dst)[i];
-
-                if (c == '\n') {
-                    dyn_buffer_write_char(&buf, '\0');
-                    processor.func(buf.buf, processor.data);
-                    buf.cur = 0;
-                } else {
-                    dyn_buffer_write_char(&buf, c);
-                }
-            }
-
-            last_ret = ret;
-        }
-    }
-
-    if (last_ret != 0) {
-        /* The last return value from ZSTD_decompressStream did not end on a
-         * frame, but we reached the end of the file! We assume this is an
-         * error, and the input was truncated.
-         */
-        LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret)
-    }
-
-    ZSTD_freeDCtx(dctx);
-    free(buf_in);
-    free(buf_out);
-
-    dyn_buffer_destroy(&buf);
-    fclose(file);
-}
-
-void read_index_ndjson(const char *line, void *_data) {
-    void **data = _data;
-    const char *index_id = data[0];
-    index_func func = data[1];
-    read_index_bin_handle_line(line, index_id, func);
-}
-
-void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
-    if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
-        read_lines(path, (line_processor_t) {
-                .data = (void *[2]) {(void *) index_id, func},
-                .func = read_index_ndjson,
-        });
-    }
-}
-
-static __thread GHashTable *IncrementalReadTable = NULL;
-
-void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
-    const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
-    const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
-
-    incremental_put(IncrementalReadTable, path_md5_str, mtime);
-}
-
-void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
-    IncrementalReadTable = table;
-    read_index(filepath, desc->id, desc->type, json_put_incremental);
-}
-
-static __thread GHashTable *IncrementalCopyTable = NULL;
-static __thread GHashTable *IncrementalNewTable = NULL;
-static __thread store_t *IncrementalCopySourceStore = NULL;
-static __thread store_t *IncrementalCopyDestinationStore = NULL;
-
-void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
-
-    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
-
-    if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
-        // Copy index line
-        cJSON_DeleteItemFromObject(document, "index");
-        char *json_str = cJSON_PrintUnformatted(document);
-        const size_t json_str_len = strlen(json_str);
-
-        json_str = realloc(json_str, json_str_len + 1);
-        *(json_str + json_str_len) = '\n';
-
-        // Copy tn store contents
-        size_t buf_len;
-        char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
-        if (buf_len != 0) {
-            store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
-            free(buf);
-        }
-
-        // Also copy additional thumbnails
-        if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
-            const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
-
-            for (int i = 1; i < thumbnail_count; i++) {
-                char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
-
-                snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
-
-                buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
-                if (buf_len != 0) {
-                    store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
-                    free(buf);
-                }
-            }
-        }
-
-        zstd_write_string(json_str, json_str_len + 1);
-        free(json_str);
-    }
-}
-
-/**
- * Copy items from an index that are in the copy_table. Also copies from
- * the store.
- */
-void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
-                      const char *dst_filepath, GHashTable *copy_table) {
-
-    if (WriterCtx.out_file == NULL) {
-        initialize_writer_ctx(dst_filepath);
-    }
-
-    IncrementalCopyTable = copy_table;
-    IncrementalCopySourceStore = store;
-    IncrementalCopyDestinationStore = dst_store;
-
-    read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
-}
-
-void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
-
-    char doc_id_n[SIST_DOC_ID_LEN + 1];
-    doc_id_n[SIST_DOC_ID_LEN] = '\0';
-    doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
-    const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
-
-    // do not delete archive virtual entries
-    if (cJSON_GetObjectItem(document, "parent") == NULL 
-        && !incremental_get(IncrementalCopyTable, doc_id)
-        && !incremental_get(IncrementalNewTable, doc_id)
-        ) {
-        memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
-        zstd_write_string(doc_id, sizeof(doc_id_n));
-    }
-}
-
-void incremental_delete(const char *del_filepath, const char *index_filepath,
-                        GHashTable *copy_table, GHashTable *new_table) {
-
-    if (WriterCtx.out_file == NULL) {
-        initialize_writer_ctx(del_filepath);
-    }
-
-    IncrementalCopyTable = copy_table;
-    IncrementalNewTable = new_table;
-
-    read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
-}
+    database_write_document(ProcData.index_db, doc, json_str);
+    free(doc);
+    free(json_str);
+}
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@ -2,55 +2,7 @@
 #define SIST2_SERIALIZE_H

 #include "src/sist.h"
-#include "store.h"
-
-#include <sys/syscall.h>
-#include <glib.h>
-
-typedef struct line_processor {
-  void* data;
-  void (*func)(const char*, void*);
-} line_processor_t;
-
-typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
-
-void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
-                      const char *dst_filepath, GHashTable *copy_table);
-
-void incremental_delete(const char *del_filepath, const char* index_filepath, 
-                        GHashTable *copy_table, GHashTable *new_table);

 void write_document(document_t *doc);

-void read_lines(const char *path, const line_processor_t processor);
-
-void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
-
-void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
-
-/**
- * Must be called after write_document
- */
-void thread_cleanup();
-
-void writer_cleanup();
-
-void write_index_descriptor(char *path, index_descriptor_t *desc);
-
-index_descriptor_t read_index_descriptor(char *path);
-
-// caller ensures char file_path[PATH_MAX]
-#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
-    snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path);              \
-    if (access(file_path, R_OK) == 0) {                                                 \
-        action_ok;                                                                      \
-    } else {                                                                            \
-        action_main_fail;                                                               \
-    }                                                                                   \
-    snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path);          \
-    if ((cond_original) && access(file_path, R_OK) == 0) {                              \
-        action_ok;                                                                      \
-    }                                                                                   \
-
-
 #endif
--- a/src/io/store.c
+++ b/src/io/store.c
@ -1,195 +0,0 @@
-#include "store.h"
-#include "src/ctx.h"
-
-store_t *store_create(const char *path, size_t chunk_size) {
-    store_t *store = malloc(sizeof(struct store_t));
-    mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
-    strcpy(store->path, path);
-
-#if (SIST_FAKE_STORE != 1)
-    store->chunk_size = chunk_size;
-    pthread_rwlock_init(&store->lock, NULL);
-
-    mdb_env_create(&store->env);
-
-    int open_ret = mdb_env_open(store->env,
-                                path,
-                                MDB_WRITEMAP | MDB_MAPASYNC,
-                                S_IRUSR | S_IWUSR
-    );
-
-    if (open_ret != 0) {
-        LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
-    }
-
-    store->size = (size_t) store->chunk_size;
-    mdb_env_set_mapsize(store->env, store->size);
-
-    // Open dbi
-    MDB_txn *txn;
-    mdb_txn_begin(store->env, NULL, 0, &txn);
-    mdb_dbi_open(txn, NULL, 0, &store->dbi);
-    mdb_txn_commit(txn);
-#endif
-
-    return store;
-}
-
-void store_destroy(store_t *store) {
-
-#if (SIST_FAKE_STORE != 1)
-    pthread_rwlock_destroy(&store->lock);
-    mdb_dbi_close(store->env, store->dbi);
-    mdb_env_close(store->env);
-#endif
-    free(store);
-}
-
-void store_flush(store_t *store) {
-    mdb_env_sync(store->env, TRUE);
-}
-
-void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
-
-    if (LogCtx.very_verbose) {
-        LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
-    }
-
-#if (SIST_FAKE_STORE != 1)
-
-    MDB_val mdb_key;
-    mdb_key.mv_data = key;
-    mdb_key.mv_size = key_len;
-
-    MDB_val mdb_value;
-    mdb_value.mv_data = buf;
-    mdb_value.mv_size = buf_len;
-
-    MDB_txn *txn;
-    pthread_rwlock_rdlock(&store->lock);
-    mdb_txn_begin(store->env, NULL, 0, &txn);
-
-    int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
-    ScanCtx.stat_tn_size += buf_len;
-
-    int db_full = FALSE;
-    int should_abort_transaction = FALSE;
-
-    if (put_ret == MDB_MAP_FULL) {
-        db_full = TRUE;
-        should_abort_transaction = TRUE;
-    } else {
-        int commit_ret = mdb_txn_commit(txn);
-
-        if (commit_ret == MDB_MAP_FULL) {
-            db_full = TRUE;
-        }
-    }
-
-    if (db_full) {
-        LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
-
-        if (should_abort_transaction) {
-            mdb_txn_abort(txn);
-        }
-
-        pthread_rwlock_unlock(&store->lock);
-
-        // Cannot resize when there is a opened transaction.
-        //  Resize take effect on the next commit.
-        pthread_rwlock_wrlock(&store->lock);
-        store->size += store->chunk_size;
-        int resize_ret = mdb_env_set_mapsize(store->env, store->size);
-        if (resize_ret != 0) {
-            LOG_ERROR("store.c", mdb_strerror(put_ret))
-        }
-        mdb_txn_begin(store->env, NULL, 0, &txn);
-        int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
-
-        if (put_ret_retry != 0) {
-            LOG_ERROR("store.c", mdb_strerror(put_ret))
-        }
-
-        int ret = mdb_txn_commit(txn);
-        if (ret != 0) {
-            LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
-                       store->path, mdb_strerror(ret), ret,
-                       put_ret, put_ret_retry);
-        }
-        LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
-    } else if (put_ret != 0) {
-        LOG_ERROR("store.c", mdb_strerror(put_ret))
-    }
-
-    pthread_rwlock_unlock(&store->lock);
-
-#endif
-}
-
-char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
-    char *buf = NULL;
-
-#if (SIST_FAKE_STORE != 1)
-    MDB_val mdb_key;
-    mdb_key.mv_data = key;
-    mdb_key.mv_size = key_len;
-
-    MDB_val mdb_value;
-
-    MDB_txn *txn;
-    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
-
-    int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
-
-    if (get_ret == MDB_NOTFOUND) {
-        *ret_vallen = 0;
-    } else {
-        *ret_vallen = mdb_value.mv_size;
-        buf = malloc(mdb_value.mv_size);
-        memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
-    }
-
-    mdb_txn_abort(txn);
-#endif
-    return buf;
-}
-
-GHashTable *store_read_all(store_t *store) {
-
-    int count = 0;
-
-    GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
-
-    MDB_txn *txn = NULL;
-    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
-
-    MDB_cursor *cur = NULL;
-    mdb_cursor_open(txn, store->dbi, &cur);
-
-    MDB_val key;
-    MDB_val value;
-
-    while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
-        char *key_str = malloc(key.mv_size);
-        memcpy(key_str, key.mv_data, key.mv_size);
-        char *val_str = malloc(value.mv_size);
-        memcpy(val_str, value.mv_data, value.mv_size);
-
-        g_hash_table_insert(table, key_str, val_str);
-        count += 1;
-    }
-
-    const char *path;
-    mdb_env_get_path(store->env, &path);
-    LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
-
-    mdb_cursor_close(cur);
-    mdb_txn_abort(txn);
-    return table;
-}
-
-
-void store_copy(store_t *store, const char *destination) {
-    mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
-    mdb_env_copy(store->env, destination);
-}
--- a/src/io/store.h
+++ b/src/io/store.h
@ -1,37 +0,0 @@
-#ifndef SIST2_STORE_H
-#define SIST2_STORE_H
-
-#include <pthread.h>
-#include <lmdb.h>
-
-#include <glib.h>
-
-#define STORE_SIZE_TN (1024 * 1024 * 5)
-#define STORE_SIZE_TAG (1024 * 1024)
-#define STORE_SIZE_META STORE_SIZE_TAG
-
-typedef struct store_t {
-    char path[PATH_MAX];
-    char *tmp_path;
-    MDB_dbi dbi;
-    MDB_env *env;
-    size_t size;
-    size_t chunk_size;
-    pthread_rwlock_t lock;
-} store_t;
-
-store_t *store_create(const char *path, size_t chunk_size);
-
-void store_destroy(store_t *store);
-
-void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
-
-void store_flush(store_t *store);
-
-char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
-
-GHashTable *store_read_all(store_t *store);
-
-void store_copy(store_t *store, const char *destination);
-
-#endif
--- a/src/io/walk.c
+++ b/src/io/walk.c
@ -1,44 +1,12 @@
 #include "walk.h"
 #include "src/ctx.h"
-#include "src/parsing/parse.h"
+#include "src/parsing/fs_util.h"

 #include <ftw.h>
+#include <pthread.h>

 #define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)

-__always_inline
-parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
-    int len = (int) strlen(filepath);
-    parse_job_t *job = malloc(sizeof(parse_job_t) + len);
-
-    strcpy(job->filepath, filepath);
-    job->base = base;
-    char *p = strrchr(filepath + base, '.');
-    if (p != NULL) {
-        job->ext = (int) (p - filepath + 1);
-    } else {
-        job->ext = len;
-    }
-
-    job->vfile.info = *info;
-
-    job->parent[0] = '\0';
-
-    job->vfile.filepath = job->filepath;
-    job->vfile.read = fs_read;
-    // Filesystem reads are always rewindable
-    job->vfile.read_rewindable = fs_read;
-    job->vfile.reset = fs_reset;
-    job->vfile.close = fs_close;
-    job->vfile.fd = -1;
-    job->vfile.is_fs_file = TRUE;
-    job->vfile.has_checksum = FALSE;
-    job->vfile.rewind_buffer_size = 0;
-    job->vfile.rewind_buffer = NULL;
-    job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
-
-    return job;
-}

 int sub_strings[30];
 #define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
@ -53,12 +21,9 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
    }

    if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
-        LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
+        LOG_DEBUGF("walk.c", "Excluded: %s", filepath);

        if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
-            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
-            ScanCtx.dbg_excluded_files_count += 1;
-            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
        } else if (typeflag == FTW_D) {
            return FTW_SKIP_SUBTREE;
        }
@ -67,8 +32,13 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
    }

    if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
-        parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
-        tpool_add_work(ScanCtx.pool, parse, job);
+        parse_job_t *job = create_parse_job(filepath, (int) info->st_mtim.tv_sec, info->st_size);
+
+        tpool_add_work(ScanCtx.pool, &(job_t) {
+                .type = JOB_PARSE_JOB,
+                .parse_job = job
+        });
+        free(job);
    }

    return FTW_CONTINUE;
@ -109,14 +79,7 @@ int iterate_file_list(void *input_file) {
        }

        if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
-            LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
-
-            if (S_ISREG(info.st_mode)) {
-                pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
-                ScanCtx.dbg_excluded_files_count += 1;
-                pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
-            }
-
+            LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
            continue;
        }

@ -124,11 +87,14 @@ int iterate_file_list(void *input_file) {
            LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
        }

-        int base = (int) (strrchr(buf, '/') - buf) + 1;
-
-        parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
+        parse_job_t *job = create_parse_job(absolute_path, (int) info.st_mtim.tv_sec, info.st_size);
        free(absolute_path);
-        tpool_add_work(ScanCtx.pool, parse, job);
+
+        tpool_add_work(ScanCtx.pool, &(job_t) {
+                .type = JOB_PARSE_JOB,
+                .parse_job = job
+        });
+        free(job);
    }

    return 0;
--- a/src/log.c
+++ b/src/log.c
@ -21,8 +21,6 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {

    char log_str[LOG_MAX_LENGTH];

-    unsigned long long pid = (unsigned long long) pthread_self();
-
    char datetime[32];
    time_t t;
    struct tm result;
@ -42,8 +40,8 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {

        log_len = snprintf(
                log_str, sizeof(log_str),
-                "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
-                pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
+                "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
+                ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
        );

        cJSON_Delete(filepath_json);
@ -58,15 +56,15 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04llX]%s [%s] [%s %s] ",
-                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                "\033[%dmT%d%s [%s] [%s %s] ",
+                31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
                datetime, log_levels[level], filepath
        );
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04llX] [%s] [%s %s] ",
-                pid, datetime, log_levels[level], filepath
+                "T%d [%s] [%s %s] ",
+                ProcData.thread_id, datetime, log_levels[level], filepath
        );
    }

@ -112,8 +110,6 @@ void sist_log(const char *filepath, int level, char *str) {

    char log_str[LOG_MAX_LENGTH];

-    unsigned long long pid = (unsigned long long) pthread_self();
-
    char datetime[32];
    time_t t;
    struct tm result;
@ -132,8 +128,8 @@ void sist_log(const char *filepath, int level, char *str) {

        log_len = snprintf(
                log_str, sizeof(log_str),
-                "{\"thread\":\"%04llX\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
-                pid, datetime, log_levels[level], filepath_json_str, log_str_json_str
+                "{\"thread\":\"T%d\",\"datetime\":\"%s\",\"level\":\"%s\",\"filepath\":%s,\"message\":%s}\n",
+                ProcData.thread_id, datetime, log_levels[level], filepath_json_str, log_str_json_str
        );

        cJSON_Delete(log_str_json);
@ -147,16 +143,16 @@ void sist_log(const char *filepath, int level, char *str) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
-                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                "\033[%dmT%d%s [%s] [%s %s] %s \033[0m\n",
+                31 + ProcData.thread_id % 7, ProcData.thread_id, log_colors[level],
                datetime, log_levels[level], filepath,
                str
        );
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04llX] [%s] [%s %s] %s \n",
-                pid, datetime, log_levels[level], filepath,
+                "T%d [%s] [%s %s] %s \n",
+                ProcData.thread_id, datetime, log_levels[level], filepath,
                str
        );
    }
--- a/src/log.h
+++ b/src/log.h
@ -2,6 +2,7 @@
 #define SIST2_LOG_H


+#include <signal.h>
 #define LOG_MAX_LENGTH 8192

 #define LOG_SIST_DEBUG 0
@ -10,32 +11,37 @@
 #define LOG_SIST_ERROR 3
 #define LOG_SIST_FATAL 4

-#define LOG_DEBUGF(filepath, fmt, ...) \
-    if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}
-#define LOG_DEBUG(filepath, str) \
-    if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}
+#define LOG_DEBUGF(filepath, fmt, ...) do{\
+    if (LogCtx.very_verbose) {sist_logf(filepath, LOG_SIST_DEBUG, fmt, __VA_ARGS__);}}while(0)
+#define LOG_DEBUG(filepath, str) do{\
+    if (LogCtx.very_verbose) {sist_log(filepath, LOG_SIST_DEBUG, str);}}while(0)

-#define LOG_INFOF(filepath, fmt, ...) \
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}
-#define LOG_INFO(filepath, str) \
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}
+#define LOG_INFOF(filepath, fmt, ...) do {\
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_INFO, fmt, __VA_ARGS__);}} while(0)
+#define LOG_INFO(filepath, str) do {\
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_INFO, str);}} while(0)

-#define LOG_WARNINGF(filepath, fmt, ...) \
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}
-#define LOG_WARNING(filepath, str) \
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}
+#define LOG_WARNINGF(filepath, fmt, ...) do {\
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_WARNING, fmt, __VA_ARGS__);}}while(0)
+#define LOG_WARNING(filepath, str) do{\
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_WARNING, str);}}while(0)

-#define LOG_ERRORF(filepath, fmt, ...) \
-    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}
-#define LOG_ERROR(filepath, str) \
-    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}
+#define LOG_ERRORF(filepath, fmt, ...) do {\
+    if (LogCtx.verbose) {sist_logf(filepath, LOG_SIST_ERROR, fmt, __VA_ARGS__);}}while(0)
+#define LOG_ERROR(filepath, str) do{\
+    if (LogCtx.verbose) {sist_log(filepath, LOG_SIST_ERROR, str);}}while(0)

-#define LOG_FATALF(filepath, fmt, ...) \
+#define LOG_FATALF(filepath, fmt, ...)\
    sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__);\
-    exit(-1);
+    raise(SIGUSR1)
 #define LOG_FATAL(filepath, str) \
    sist_log(filepath, LOG_SIST_FATAL, str);\
-    exit(-1);
+    exit(SIGUSR1)
+
+#define LOG_FATALF_NO_EXIT(filepath, fmt, ...) \
+    sist_logf(filepath, LOG_SIST_FATAL, fmt, __VA_ARGS__)
+#define LOG_FATAL_NO_EXIT(filepath, str) \
+    sist_log(filepath, LOG_SIST_FATAL, str)

 #include "sist.h"

--- a/src/main.c
+++ b/src/main.c
@ -5,8 +5,6 @@
 #include <locale.h>

 #include "cli.h"
-#include "io/serialize.h"
-#include "io/store.h"
 #include "tpool.h"
 #include "io/walk.h"
 #include "index/elastic.h"
@ -16,13 +14,9 @@
 #include "auth0/auth0_c_api.h"

 #include <signal.h>
-#include <unistd.h>
+#include <pthread.h>

-#include "stats.h"
-
-#define DESCRIPTION "Lightning-fast file system indexer and search tool."
-
-#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
+#include "src/database/database.h"


 static const char *const usage[] = {
@ -34,109 +28,62 @@ static const char *const usage[] = {
 };


-static __sighandler_t sigsegv_handler = NULL;
-static __sighandler_t sigabrt_handler = NULL;
+void database_scan_begin(scan_args_t *args) {
+    index_descriptor_t *desc = &ScanCtx.index.desc;

-void sig_handler(int signum) {
+    database_t *db = database_create(args->output, INDEX_DATABASE);

-    LogCtx.verbose = TRUE;
-    LogCtx.very_verbose = TRUE;
+    if (args->incremental) {
+        // Update existing descriptor
+        database_open(db);
+        index_descriptor_t *original_desc = database_read_index_descriptor(db);

-    LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
-    LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
+        // copy original index id
+        strcpy(desc->id, original_desc->id);

-    if (ScanCtx.dbg_current_files != NULL) {
-        GHashTableIter iter;
-        g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
-
-        void *key;
-        void *value;
-        while (g_hash_table_iter_next(&iter, &key, &value)) {
-            parse_job_t *job = value;
-
-            if (isatty(STDERR_FILENO)) {
-                LOG_DEBUGF(
-                        "*SIGNAL HANDLER*",
-                        "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
-                        31 + ((unsigned int) key) % 7, key, job->filepath
-                );
-            } else {
-                LOG_DEBUGF(
-                        "*SIGNAL HANDLER*",
-                        "THREAD [%04llX] was working on job %s",
-                        key, job->filepath
-                );
-            }
+        if (original_desc->version_major != VersionMajor) {
+            LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc->version, Version);
        }
-    }

-    if (ScanCtx.pool != NULL) {
-        tpool_dump_debug_info(ScanCtx.pool);
-    }
+        strcpy(original_desc->root, desc->root);
+        original_desc->root_len = desc->root_len;
+        strcpy(original_desc->rewrite_url, desc->rewrite_url);
+        strcpy(original_desc->name, desc->name);

-    if (IndexCtx.pool != NULL) {
-        tpool_dump_debug_info(IndexCtx.pool);
-    }
+        time(&original_desc->timestamp);

-    LOG_INFO(
-            "*SIGNAL HANDLER*",
-            "Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
-    )
-    LOG_INFO(
-            "*SIGNAL HANDLER*",
-            "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
-    )
+        database_write_index_descriptor(db, original_desc);
+        free(original_desc);

-#ifndef SIST_DEBUG
-    LOG_WARNING(
-            "*SIGNAL HANDLER*",
-            "You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
-            "releases page to provide additionnal information when submitting a bug report."
-    )
-#endif
+        database_incremental_scan_begin(db);

-    if (signum == SIGSEGV && sigsegv_handler != NULL) {
-        sigsegv_handler(signum);
-    } else if (signum == SIGABRT && sigabrt_handler != NULL) {
-        sigabrt_handler(signum);
-    }
-
-    exit(-1);
-}
-
-void init_dir(const char *dirpath, scan_args_t *args) {
-    char path[PATH_MAX];
-    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
-
-    time(&ScanCtx.index.desc.timestamp);
-    strcpy(ScanCtx.index.desc.version, Version);
-    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
-
-    if (args->incremental != NULL) {
-        // copy old index id
-        char descriptor_path[PATH_MAX];
-        snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
-        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
-        memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
    } else {
+        // Create new descriptor
+
+        time(&desc->timestamp);
+        strcpy(desc->version, Version);
+        desc->version_major = VersionMajor;
+        desc->version_minor = VersionMinor;
+        desc->version_patch = VersionPatch;
+
        // generate new index id based on timestamp
        unsigned char index_md5[MD5_DIGEST_LENGTH];
        MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
        buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+
+        database_initialize(db);
+        database_open(db);
+        database_write_index_descriptor(db, desc);
    }

-    write_index_descriptor(path, &ScanCtx.index.desc);
+    database_close(db, FALSE);
 }

-void scan_print_header() {
-    LOG_INFOF("main.c", "sist2 v%s", Version)
+void write_thumbnail_callback(char *key, int num, void *buf, size_t buf_len) {
+    database_write_thumbnail(ProcData.index_db, key, num, buf, buf_len);
 }

-void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
-    store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
-}
-
-void _log(const char *filepath, int level, char *str) {
+void log_callback(const char *filepath, int level, char *str) {
    if (level == LEVEL_FATAL) {
        sist_log(filepath, level, str);
        exit(-1);
@ -153,7 +100,7 @@ void _log(const char *filepath, int level, char *str) {
    }
 }

-void _logf(const char *filepath, int level, char *format, ...) {
+void logf_callback(const char *filepath, int level, char *format, ...) {

    va_list args;

@ -177,17 +124,12 @@ void _logf(const char *filepath, int level, char *format, ...) {

 void initialize_scan_context(scan_args_t *args) {

-    ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
-    pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
-    pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
-    pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
-
    ScanCtx.calculate_checksums = args->calculate_checksums;

    // Archive
    ScanCtx.arc_ctx.mode = args->archive_mode;
-    ScanCtx.arc_ctx.log = _log;
-    ScanCtx.arc_ctx.logf = _logf;
+    ScanCtx.arc_ctx.log = log_callback;
+    ScanCtx.arc_ctx.logf = logf_callback;
    ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
    if (args->archive_passphrase != NULL) {
        strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
@ -196,17 +138,16 @@ void initialize_scan_context(scan_args_t *args) {
    }

    // Comic
-    ScanCtx.comic_ctx.log = _log;
-    ScanCtx.comic_ctx.logf = _logf;
-    ScanCtx.comic_ctx.store = _store;
+    ScanCtx.comic_ctx.log = log_callback;
+    ScanCtx.comic_ctx.logf = logf_callback;
+    ScanCtx.comic_ctx.store = write_thumbnail_callback;
    ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.comic_ctx.tn_size = args->tn_size;
    ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
-    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
-    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
+    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string("application/x-cbr");
+    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string("application/x-cbz");

    // Ebook
-    pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
    ScanCtx.ebook_ctx.content_size = args->content_size;
    ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.ebook_ctx.tn_size = args->tn_size;
@ -214,25 +155,25 @@ void initialize_scan_context(scan_args_t *args) {
        ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
        ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
    }
-    ScanCtx.ebook_ctx.log = _log;
-    ScanCtx.ebook_ctx.logf = _logf;
-    ScanCtx.ebook_ctx.store = _store;
+    ScanCtx.ebook_ctx.log = log_callback;
+    ScanCtx.ebook_ctx.logf = logf_callback;
+    ScanCtx.ebook_ctx.store = write_thumbnail_callback;
    ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
    ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;

    // Font
    ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
-    ScanCtx.font_ctx.log = _log;
-    ScanCtx.font_ctx.logf = _logf;
-    ScanCtx.font_ctx.store = _store;
+    ScanCtx.font_ctx.log = log_callback;
+    ScanCtx.font_ctx.logf = logf_callback;
+    ScanCtx.font_ctx.store = write_thumbnail_callback;

    // Media
    ScanCtx.media_ctx.tn_qscale = args->tn_quality;
    ScanCtx.media_ctx.tn_size = args->tn_size;
    ScanCtx.media_ctx.tn_count = args->tn_count;
-    ScanCtx.media_ctx.log = _log;
-    ScanCtx.media_ctx.logf = _logf;
-    ScanCtx.media_ctx.store = _store;
+    ScanCtx.media_ctx.log = log_callback;
+    ScanCtx.media_ctx.logf = logf_callback;
+    ScanCtx.media_ctx.store = write_thumbnail_callback;
    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
    ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
    ScanCtx.media_ctx.read_subtitles = args->tn_count;
@ -246,30 +187,29 @@ void initialize_scan_context(scan_args_t *args) {
    // OOXML
    ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.ooxml_ctx.content_size = args->content_size;
-    ScanCtx.ooxml_ctx.log = _log;
-    ScanCtx.ooxml_ctx.logf = _logf;
-    ScanCtx.ooxml_ctx.store = _store;
+    ScanCtx.ooxml_ctx.log = log_callback;
+    ScanCtx.ooxml_ctx.logf = logf_callback;
+    ScanCtx.ooxml_ctx.store = write_thumbnail_callback;

    // MOBI
    ScanCtx.mobi_ctx.content_size = args->content_size;
-    ScanCtx.mobi_ctx.log = _log;
-    ScanCtx.mobi_ctx.logf = _logf;
+    ScanCtx.mobi_ctx.log = log_callback;
+    ScanCtx.mobi_ctx.logf = logf_callback;

    // TEXT
    ScanCtx.text_ctx.content_size = args->content_size;
-    ScanCtx.text_ctx.log = _log;
-    ScanCtx.text_ctx.logf = _logf;
+    ScanCtx.text_ctx.log = log_callback;
+    ScanCtx.text_ctx.logf = logf_callback;

    // MSDOC
    ScanCtx.msdoc_ctx.content_size = args->content_size;
-    ScanCtx.msdoc_ctx.log = _log;
-    ScanCtx.msdoc_ctx.logf = _logf;
-    ScanCtx.msdoc_ctx.store = _store;
-    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");
+    ScanCtx.msdoc_ctx.log = log_callback;
+    ScanCtx.msdoc_ctx.logf = logf_callback;
+    ScanCtx.msdoc_ctx.store = write_thumbnail_callback;
+    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string("application/msword");

    ScanCtx.threads = args->threads;
    ScanCtx.depth = args->depth;
-    ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;

    strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
    strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@ -282,176 +222,66 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
    ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
    ScanCtx.raw_ctx.tn_size = args->tn_size;
-    ScanCtx.raw_ctx.log = _log;
-    ScanCtx.raw_ctx.logf = _logf;
-    ScanCtx.raw_ctx.store = _store;
+    ScanCtx.raw_ctx.log = log_callback;
+    ScanCtx.raw_ctx.logf = logf_callback;
+    ScanCtx.raw_ctx.store = write_thumbnail_callback;

    // Wpd
    ScanCtx.wpd_ctx.content_size = args->content_size;
-    ScanCtx.wpd_ctx.log = _log;
-    ScanCtx.wpd_ctx.logf = _logf;
-    ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
+    ScanCtx.wpd_ctx.log = log_callback;
+    ScanCtx.wpd_ctx.logf = logf_callback;
+    ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string("application/wordperfect");

    // Json
    ScanCtx.json_ctx.content_size = args->content_size;
-    ScanCtx.json_ctx.log = _log;
-    ScanCtx.json_ctx.logf = _logf;
-    ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
-    ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
+    ScanCtx.json_ctx.log = log_callback;
+    ScanCtx.json_ctx.logf = logf_callback;
+    ScanCtx.json_ctx.json_mime = mime_get_mime_by_string("application/json");
+    ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string("application/ndjson");
 }

-/**
- * Loads an existing index as the baseline for incremental scanning.
- *   1. load old index files (original+main) => original_table
- *   2. allocate empty table                 => copy_table
- *   3. allocate empty table                 => new_table
- * the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
- * and consumed in main.c:save_incremental_index
- *
- * Note: the existing index may or may not be of incremental index form.
- */
-void load_incremental_index(const scan_args_t *args) {
-    char file_path[PATH_MAX];
-
-    ScanCtx.original_table = incremental_get_table();
-    ScanCtx.copy_table = incremental_get_table();
-    ScanCtx.new_table = incremental_get_table();
-
-    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
-    index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
-
-    if (strcmp(original_desc.version, Version) != 0) {
-        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
-    }
-
-    READ_INDICES(
-            file_path,
-            args->incremental,
-            incremental_read(ScanCtx.original_table, file_path, &original_desc),
-            LOG_DEBUG("main.c", "The base index for incremental scan does not have a main index"),
-            TRUE
-    );
-
-    LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
-}
-
-/**
- * Saves an incremental index.
- * Before calling this function, the scanner should have finished writing the main index.
- *   1. Build original_table - new_table => delete_table
- *   2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
- */
-void save_incremental_index(scan_args_t *args) {
-    char dst_path[PATH_MAX];
-    char store_path[PATH_MAX];
-    char file_path[PATH_MAX];
-    char del_path[PATH_MAX];
-    snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
-    snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
-    store_t *source = store_create(store_path, STORE_SIZE_TN);
-
-    LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
-              g_hash_table_size(ScanCtx.original_table),
-              g_hash_table_size(ScanCtx.copy_table),
-              g_hash_table_size(ScanCtx.new_table));
-    snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
-    READ_INDICES(file_path, args->incremental,
-                 incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
-                 perror("incremental_delete"), 1);
-    writer_cleanup();
-
-    READ_INDICES(file_path, args->incremental,
-                 incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
-                 perror("incremental_copy"), 1);
-    writer_cleanup();
-
-    store_destroy(source);
-
-    snprintf(store_path, PATH_MAX, "%stags", args->incremental);
-    snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
-    store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
-    store_copy(source_tags, dst_path);
-    store_destroy(source_tags);
-}
-
-/**
- * An index can be either incremental or non-incremental (initial index).
- * For an initial index, there is only the "main" index.
- * For an incremental index, there are, additionally:
- *   - An "original" index, referencing all files unchanged since the previous index.
- *   - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
- * Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
- * and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
- * the old "delete" index can be safely ignored.
- */
 void sist2_scan(scan_args_t *args) {
-
-    ScanCtx.mime_table = mime_get_mime_table();
-    ScanCtx.ext_table = mime_get_ext_table();
-
    initialize_scan_context(args);

-    init_dir(ScanCtx.index.path, args);
+    database_scan_begin(args);

-    char store_path[PATH_MAX];
-    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
-    ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
+    LOG_INFOF("main.c", "sist2 v%s", Version);

-    snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
-    ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);
-
-    scan_print_header();
-
-    if (args->incremental != NULL) {
-        load_incremental_index(args);
-    }
-
-    ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
+    ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
    tpool_start(ScanCtx.pool);

-    ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
-    tpool_start(ScanCtx.writer_pool);
-
    if (args->list_path) {
        // Scan using file list
        int list_ret = iterate_file_list(args->list_file);
        if (list_ret != 0) {
-            LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
+            LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret);
        }
    } else {
        // Scan directory recursively
        int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
        if (walk_ret == -1) {
-            LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
+            LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno);
        }
    }

    tpool_wait(ScanCtx.pool);
    tpool_destroy(ScanCtx.pool);

-    tpool_wait(ScanCtx.writer_pool);
-    tpool_destroy(ScanCtx.writer_pool);
+    LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size);
+    LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size);

-    LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
-    LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
-    LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
-    LOG_DEBUGF("main.c", "Thumbnail store size: %lu", ScanCtx.stat_tn_size)
-    LOG_DEBUGF("main.c", "Index size: %lu", ScanCtx.stat_index_size)
+    database_t *db = database_create(args->output, INDEX_DATABASE);
+    database_open(db);

-    if (args->incremental != NULL) {
-        save_incremental_index(args);
+    if (args->incremental != FALSE) {
+        database_incremental_scan_end(db);
    }

-    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
-
-    store_destroy(ScanCtx.index.store);
-    store_destroy(ScanCtx.index.meta_store);
+    database_generate_stats(db, args->treemap_threshold);
+    database_close(db, args->optimize_database);
 }

 void sist2_index(index_args_t *args) {
-    char file_path[PATH_MAX];
-
    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
@ -462,91 +292,72 @@ void sist2_index(index_args_t *args) {
        elastic_init(args->force_reset, args->es_mappings, args->es_settings);
    }

-    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
+    database_t *db = database_create(args->index_path, INDEX_DATABASE);
+    database_open(db);
+    index_descriptor_t *desc = database_read_index_descriptor(db);
+    database_close(db, FALSE);

-    index_descriptor_t desc = read_index_descriptor(descriptor_path);
+    LOG_DEBUGF("main.c", "Index version %s", desc->version);

-    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
-
-    if (strcmp(desc.version, Version) != 0) {
-        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc.version, Version)
+    if (desc->version_major != VersionMajor) {
+        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", desc->version, Version);
    }

-    DIR *dir = opendir(args->index_path);
-    if (dir == NULL) {
-        LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
-    }
-
-    char path_tmp[PATH_MAX];
-    snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
-    IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
-    IndexCtx.tags = store_read_all(IndexCtx.tag_store);
-
-    snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
-    IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
-    IndexCtx.meta = store_read_all(IndexCtx.meta_store);
-
-    index_func f;
-    if (args->print) {
-        f = print_json;
-    } else {
-        f = index_json;
-    }
-
-    IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
+    IndexCtx.pool = tpool_create(args->threads, args->print == FALSE);
    tpool_start(IndexCtx.pool);

-    READ_INDICES(file_path, args->index_path, {
-        read_index(file_path, desc.id, desc.type, f);
-        LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
-    }, {}, !args->incremental);
+    int cnt = 0;

-    // Only read the _delete index if we're sending data to ES
-    if (!args->print) {
-        snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
-        if (0 == access(file_path, R_OK)) {
-            read_lines(file_path, (line_processor_t) {
-                    .data = NULL,
-                    .func = delete_document
-            });
-            LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
+    db = database_create(args->index_path, INDEX_DATABASE);
+    database_open(db);
+    database_iterator_t *iterator = database_create_document_iterator(db);
+    database_document_iter_foreach(json, iterator) {
+        const char *doc_id = cJSON_GetObjectItem(json, "_id")->valuestring;
+        if (args->print) {
+            print_json(json, doc_id);
+        } else {
+            index_json(json, doc_id);
+            cnt += 1;
        }
    }

-    closedir(dir);
+    free(iterator);
+    database_close(db, FALSE);
+
+    if (!args->print) {
+        database_iterator_t *del_iter = database_create_delete_list_iterator(db);
+        database_delete_list_iter_foreach(id, del_iter) {
+            delete_document(id);
+            free(id);
+        }
+    }

    tpool_wait(IndexCtx.pool);
-
    tpool_destroy(IndexCtx.pool);

    if (IndexCtx.needs_es_connection) {
-        finish_indexer(args->script, args->async_script, desc.id);
+        finish_indexer(args->script, args->async_script, desc->id);
    }
-
-    store_destroy(IndexCtx.tag_store);
-    store_destroy(IndexCtx.meta_store);
-    g_hash_table_remove_all(IndexCtx.tags);
-    g_hash_table_destroy(IndexCtx.tags);
+    free(desc);
 }

 void sist2_exec_script(exec_args_t *args) {
-
    LogCtx.verbose = TRUE;

-    char descriptor_path[PATH_MAX];
-    snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
-    index_descriptor_t desc = read_index_descriptor(descriptor_path);
-
    IndexCtx.es_url = args->es_url;
    IndexCtx.es_index = args->es_index;
    IndexCtx.es_insecure_ssl = args->es_insecure_ssl;
    IndexCtx.needs_es_connection = TRUE;

-    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+    database_t *db = database_create(args->index_path, INDEX_DATABASE);
+    database_open(db);

-    execute_update_script(args->script, args->async_script, desc.id);
+    index_descriptor_t *desc = database_read_index_descriptor(db);
+    LOG_DEBUGF("main.c", "Index version %s", desc->version);
+
+    execute_update_script(args->script, args->async_script, desc->id);
    free(args->script);
+    database_close(db, FALSE);
 }

 void sist2_web(web_args_t *args) {
@ -570,23 +381,17 @@ void sist2_web(web_args_t *args) {

    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
-        if (abs_path == NULL) {
-            return;
-        }
-        char path_tmp[PATH_MAX];
-
-        snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
-        WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
-
-        snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
-        mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
-        WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);
-
-        snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
-        WebCtx.indices[i].desc = read_index_descriptor(path_tmp);

        strcpy(WebCtx.indices[i].path, abs_path);
-        LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
+
+        WebCtx.indices[i].db = database_create(abs_path, INDEX_DATABASE);
+        database_open(WebCtx.indices[i].db);
+
+        index_descriptor_t *desc = database_read_index_descriptor(WebCtx.indices[i].db);
+        WebCtx.indices[i].desc = *desc;
+        free(desc);
+
+        LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name);
        free(abs_path);
    }

@ -601,7 +406,7 @@ void sist2_web(web_args_t *args) {
 *   Negative number          -> Raise error
 *   Specified a valid number -> Continue as normal
 */
-int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
+int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct argparse_option *option) {
    int specified_value = *(int *) option->value;

    if (specified_value == 0) {
@ -614,11 +419,7 @@ int set_to_negative_if_value_is_zero(struct argparse *self, const struct argpars
    }
 }

-
 int main(int argc, const char *argv[]) {
-    sigsegv_handler = signal(SIGSEGV, sig_handler);
-    sigabrt_handler = signal(SIGABRT, sig_handler);
-
    setlocale(LC_ALL, "");

    scan_args_t *scan_args = scan_args_create();
@ -638,38 +439,37 @@ int main(int argc, const char *argv[]) {
    struct argparse_option options[] = {
            OPT_HELP(),

-            OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
-            OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
-            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
+            OPT_BOOLEAN('v', "version", &arg_version, "Print version and exit."),
+            OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging."),
+            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages."),
            OPT_BOOLEAN(0, "json-logs", &LogCtx.json_logs, "Output logs in JSON format."),

            OPT_GROUP("Scan options"),
-            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
-            OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
-                        "Total memory threshold in MiB for scan throttling. DEFAULT=0",
-                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
            OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
-                      "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
-                      set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
+                        "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2",
+                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
            OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
-                        "Thumbnail size, in pixels. DEFAULT=500",
+                        "Thumbnail size, in pixels. DEFAULT: 552",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
            OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
-                        "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
+                        "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
            OPT_INTEGER(0, "content-size", &scan_args->content_size,
-                        "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
+                        "Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768",
                        set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
-            OPT_STRING(0, "incremental", &scan_args->incremental,
-                       "Reuse an existing index and only scan modified files."),
-            OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
+            OPT_STRING('o', "output", &scan_args->output, "Output index file path. DEFAULT: index.sist2"),
+            OPT_BOOLEAN(0, "incremental", &scan_args->incremental,
+                        "If the output file path exists, only scan new or modified files."),
+            OPT_BOOLEAN(0, "optimize-index", &scan_args->optimize_database,
+                        "Defragment index file after scan to reduce its file size."),
            OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
-            OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
+            OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: index"),
            OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
                                                       "Use 0 to only scan files in PATH. DEFAULT: -1"),
            OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
-                                                          "skip: Don't parse, list: only get file names as text, "
-                                                          "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
+                                                          "skip: don't scan, list: only save file names as text, "
+                                                          "shallow: don't scan archives inside archives. DEFAULT: recurse"),
            OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
                       "Passphrase for encrypted archive files"),

@ -678,8 +478,8 @@ int main(int argc, const char *argv[]) {
                       "which are installed on your machine)"),
            OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
            OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
-            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
-            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
+            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned."),
+            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type."),
            OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
                                                                                  "(see USAGE.md). DEFAULT: 0.0005"),
            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
@ -687,47 +487,52 @@ int main(int argc, const char *argv[]) {
                        "(see USAGE.md). DEFAULT: 2000"),
            OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
            OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
-                        "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
+                        "Faster but less accurate EPUB parsing (no thumbnails, metadata)."),
            OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
            OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
                                                              " instead of normal directory traversal. Use '-' to read"
                                                              " from stdin."),

            OPT_GROUP("Index options"),
-            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
-            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT: http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
+                        "Do not verify SSL connections to Elasticsearch."),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
+            OPT_BOOLEAN('p', "print", &index_args->print,
+                        "Print JSON documents to stdout instead of indexing to elasticsearch."),
            OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
                        "Conduct incremental indexing. Assumes that the old index is already ingested in Elasticsearch."),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
            OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
-            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
-            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
-                                                                      "(You must use this option the first time you use the index command)"),
+            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 70"),
+            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings."),

            OPT_GROUP("Web options"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
-            OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
+                        "Do not verify SSL connections to Elasticsearch."),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
+            OPT_STRING(0, "bind", &web_args->listen_address,
+                       "Listen for connections on this address. DEFAULT: localhost:4090"),
            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
            OPT_STRING(0, "auth0-audience", &web_args->auth0_audience, "API audience/identifier"),
            OPT_STRING(0, "auth0-domain", &web_args->auth0_domain, "Application domain"),
            OPT_STRING(0, "auth0-client-id", &web_args->auth0_client_id, "Application client ID"),
-            OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path, "Path to Auth0 public key file extracted from <domain>/pem"),
+            OPT_STRING(0, "auth0-public-key-file", &web_args->auth0_public_key_path,
+                       "Path to Auth0 public key file extracted from <domain>/pem"),
            OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
            OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
            OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
            OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),

            OPT_GROUP("Exec-script options"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
-            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl, "Do not verify SSL connections to Elasticsearch."),
-            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT: http://localhost:9200"),
+            OPT_BOOLEAN(0, "es-insecure-ssl", &common_es_insecure_ssl,
+                        "Do not verify SSL connections to Elasticsearch."),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT: sist2"),
            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),

@ -736,7 +541,11 @@ int main(int argc, const char *argv[]) {

    struct argparse argparse;
    argparse_init(&argparse, options, usage, 0);
-    argparse_describe(&argparse, DESCRIPTION, EPILOG);
+    argparse_describe(
+            &argparse,
+            "\nLightning-fast file system indexer and search tool.",
+            "\nMade by simon987 <me@simon987.net>. Released under GPL-3.0"
+    );
    argc = argparse_parse(&argparse, argc, argv);

    if (arg_version) {
@ -804,7 +613,7 @@ int main(int argc, const char *argv[]) {

    } else {
        argparse_usage(&argparse);
-        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
+        LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0]);
    }
    printf("\n");

--- a/src/parsing/fs_util.h
+++ b/src/parsing/fs_util.h
@ -0,0 +1,41 @@
+#ifndef SIST2_FS_UTIL_H
+#define SIST2_FS_UTIL_H
+
+#include "src/sist.h"
+
+#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
+
+static int fs_read(struct vfile *f, void *buf, size_t size) {
+    if (f->fd == -1) {
+        SHA1_Init(&f->sha1_ctx);
+
+        f->fd = open(f->filepath, O_RDONLY);
+        if (f->fd == -1) {
+            return -1;
+        }
+    }
+
+    int ret = (int) read(f->fd, buf, size);
+
+    if (ret != 0 && f->calculate_checksum) {
+        f->has_checksum = TRUE;
+        safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
+    }
+
+    return ret;
+}
+
+static void fs_close(struct vfile *f) {
+    if (f->fd != -1) {
+        SHA1_Final(f->sha1_digest, &f->sha1_ctx);
+        close(f->fd);
+    }
+}
+
+static void fs_reset(struct vfile *f) {
+    if (f->fd != -1) {
+        lseek(f->fd, 0, SEEK_SET);
+    }
+}
+
+#endif
--- a/src/parsing/magic_util.c
+++ b/src/parsing/magic_util.c
@ -0,0 +1,32 @@
+#include "magic_util.h"
+#include "src/log.h"
+#include "mime.h"
+#include <magic.h>
+#include "src/magic_generated.c"
+
+
+char *magic_buffer_embedded(void *buffer, size_t buffer_size) {
+
+    magic_t magic = magic_open(MAGIC_MIME_TYPE);
+
+    const char *magic_buffers[1] = {magic_database_buffer,};
+    size_t sizes[1] = {sizeof(magic_database_buffer),};
+
+    // TODO optimisation: check if we can reuse the magic instance
+    int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
+
+    if (load_ret != 0) {
+        LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret);
+    }
+
+    const char *magic_mime_str = magic_buffer(magic, buffer, buffer_size);
+    char *return_value = NULL;
+
+    if (magic_mime_str != NULL) {
+        return_value = malloc(strlen(magic_mime_str) + 1);
+        strcpy(return_value, magic_mime_str);
+    }
+
+    magic_close(magic);
+    return return_value;
+}
--- a/src/parsing/magic_util.h
+++ b/src/parsing/magic_util.h
@ -0,0 +1,8 @@
+#ifndef SIST2_MAGIC_UTIL_H
+#define SIST2_MAGIC_UTIL_H
+
+#include <stdio.h>
+
+char *magic_buffer_embedded(void *buffer, size_t buffer_size);
+
+#endif //SIST2_MAGIC_UTIL_H
--- a/src/parsing/mime.c
+++ b/src/parsing/mime.c
@ -1,22 +1,30 @@
 #include "mime.h"
+#include <zlib.h>

-unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
-    char lower[8];
-    char *p = lower;
+unsigned int mime_get_mime_by_ext(const char *ext) {
+    unsigned char lower[16];
+    unsigned char *p = lower;
    int cnt = 0;
    while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
-        *p++ = (char)tolower(*ext++);
+        *p++ = tolower(*ext++);
        cnt++;
    }
    *p = '\0';
-    return (size_t) g_hash_table_lookup(ext_table, lower);
+
+    unsigned long crc = crc32(0, lower, cnt);
+
+    unsigned int mime = mime_extension_lookup(crc);
+    return mime;
 }

-unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str) {
+unsigned int mime_get_mime_by_string(const char *str) {

-    const char * ptr = str;
+    const char *ptr = str;
    while (*ptr == ' ' || *ptr == '[') {
        ptr++;
    }
-    return (size_t) g_hash_table_lookup(mime_table, ptr);
+
+    unsigned long crc = crc32(0, (unsigned char *) ptr, strlen(ptr));
+
+    return mime_name_lookup(crc);
 }
--- a/src/parsing/mime.h
+++ b/src/parsing/mime.h
@ -51,14 +51,14 @@ enum major_mime {

 enum mime;

-GHashTable *mime_get_mime_table();
+unsigned int mime_name_lookup(unsigned long mime_crc32);

-GHashTable *mime_get_ext_table();
+unsigned int mime_extension_lookup(unsigned long extension_crc32);

-char *mime_get_mime_text(unsigned int);
+const char *mime_get_mime_text(unsigned int);

-unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext);
+unsigned int mime_get_mime_by_ext(const char *ext);

-unsigned int mime_get_mime_by_string(GHashTable *mime_table, const char * str);
+unsigned int mime_get_mime_by_string(const char *str);

 #endif
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@ -5,235 +5,234 @@
 #include "mime.h"
 #include "src/io/serialize.h"
 #include "src/parsing/sidecar.h"
-#include "src/magic_generated.c"
-
-#include <magic.h>
+#include "src/parsing/fs_util.h"
+#include "src/parsing/magic_util.h"
+#include <pthread.h>


 #define MIN_VIDEO_SIZE (1024 * 64)
 #define MIN_IMAGE_SIZE (512)

-int fs_read(struct vfile *f, void *buf, size_t size) {
+#define MAGIC_BUF_SIZE (4096 * 6)

-    if (f->fd == -1) {
-        SHA1_Init(&f->sha1_ctx);
+typedef enum {
+    FILETYPE_DONT_PARSE,
+    FILETYPE_RAW,
+    FILETYPE_MEDIA,
+    FILETYPE_EBOOK,
+    FILETYPE_MARKUP,
+    FILETYPE_TEXT,
+    FILETYPE_FONT,
+    FILETYPE_ARCHIVE,
+    FILETYPE_OOXML,
+    FILETYPE_COMIC,
+    FILETYPE_MOBI,
+    FILETYPE_SIST2_SIDECAR,
+    FILETYPE_MSDOC,
+    FILETYPE_JSON,
+    FILETYPE_NDJSON,
+} file_type_t;

-        f->fd = open(f->filepath, O_RDONLY);
-        if (f->fd == -1) {
-            return -1;
+file_type_t get_file_type(unsigned int mime, size_t size, const char *filepath) {
+
+    int major_mime = MAJOR_MIME(mime);
+
+    if (!(SHOULD_PARSE(mime))) {
+        return FILETYPE_DONT_PARSE;
+    } else if (IS_RAW(mime)) {
+        return FILETYPE_RAW;
+    } else if ((major_mime == MimeVideo && size >= MIN_VIDEO_SIZE) ||
+               (major_mime == MimeImage && size >= MIN_IMAGE_SIZE) || major_mime == MimeAudio) {
+        return FILETYPE_MEDIA;
+    } else if (IS_PDF(mime)) {
+        return FILETYPE_EBOOK;
+    } else if (IS_MARKUP(mime)) {
+        return FILETYPE_MARKUP;
+    } else if (major_mime == MimeText) {
+        return FILETYPE_TEXT;
+    } else if (IS_FONT(mime)) {
+        return FILETYPE_FONT;
+    } else if (ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
+                    IS_ARC(mime) ||
+                    (IS_ARC_FILTER(mime) && should_parse_filtered_file(filepath))
+            )) {
+        return FILETYPE_ARCHIVE;
+    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(mime)) {
+        return FILETYPE_OOXML;
+    } else if (is_cbr(&ScanCtx.comic_ctx, mime) || is_cbz(&ScanCtx.comic_ctx, mime)) {
+        return FILETYPE_COMIC;
+    } else if (IS_MOBI(mime)) {
+        return FILETYPE_MOBI;
+    } else if (mime == MIME_SIST2_SIDECAR) {
+        return FILETYPE_SIST2_SIDECAR;
+    } else if (is_msdoc(&ScanCtx.msdoc_ctx, mime)) {
+        return FILETYPE_MSDOC;
+    } else if (is_json(&ScanCtx.json_ctx, mime)) {
+        return FILETYPE_JSON;
+    } else if (is_ndjson(&ScanCtx.json_ctx, mime)) {
+        return FILETYPE_NDJSON;
+    }
+}
+
+#define GET_MIME_ERROR_FATAL (-1)
+
+int get_mime(parse_job_t *job) {
+
+    char *extension = job->filepath + job->ext;
+
+    int mime = 0;
+
+    if (job->vfile.st_size == 0) {
+        return MIME_EMPTY;
+    }
+
+    if (*extension != '\0' && (job->ext - job->base != 1)) {
+        mime = (int) mime_get_mime_by_ext(extension);
+
+        if (mime != 0) {
+            return mime;
        }
    }

-    int ret = (int) read(f->fd, buf, size);
-
-    if (ret != 0 && f->calculate_checksum) {
-        f->has_checksum = TRUE;
-        safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
+    if (ScanCtx.fast) {
+        return 0;
    }

-    return ret;
-}
-
-#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
-
-void fs_close(struct vfile *f) {
-    if (f->fd != -1) {
-        SHA1_Final(f->sha1_digest, &f->sha1_ctx);
-        close(f->fd);
-    }
-}
-
-void fs_reset(struct vfile *f) {
-    if (f->fd != -1) {
-        lseek(f->fd, 0, SEEK_SET);
-    }
-}
-
-void set_dbg_current_file(parse_job_t *job) {
-    unsigned long long pid = (unsigned long long) pthread_self();
-    pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
-    g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
-    pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
-}
-
-void parse(void *arg) {
-
-    parse_job_t *job = arg;
-
-    document_t *doc = malloc(sizeof(document_t));
-    doc->filepath = malloc(strlen(job->filepath) + 1);
-
-    set_dbg_current_file(job);
-
-    strcpy(doc->filepath, job->filepath);
-    doc->ext = (short) job->ext;
-    doc->base = (short) job->base;
-
-    char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
-    generate_doc_id(rel_path, doc->doc_id);
-
-    doc->meta_head = NULL;
-    doc->meta_tail = NULL;
-    doc->mime = 0;
-    doc->size = job->vfile.info.st_size;
-    doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
-
-    int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
-    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
-        pthread_mutex_lock(&ScanCtx.copy_table_mu);
-        incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
-        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
-
-        pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
-        ScanCtx.dbg_skipped_files_count += 1;
-        pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
-
-        CLOSE_FILE(job->vfile)
-        free(doc->filepath);
-        free(doc);
-
-        return;
-    }
-
-    if (ScanCtx.new_table != NULL) {
-        pthread_mutex_lock(&ScanCtx.copy_table_mu);
-        incremental_mark_file(ScanCtx.new_table, doc->doc_id);
-        pthread_mutex_unlock(&ScanCtx.copy_table_mu);
+    // Get mime type with libmagic
+    if (job->vfile.read_rewindable == NULL) {
+        LOG_WARNING(job->filepath,
+                    "File does not support rewindable reads, cannot guess Media type");
+        return 0;
    }

    char *buf[MAGIC_BUF_SIZE];
-
-    if (LogCtx.very_verbose) {
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
-    }
-
-    if (job->vfile.info.st_size == 0) {
-        doc->mime = MIME_EMPTY;
-    } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
-        doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
-    }
-
-
-    if (doc->mime == 0 && !ScanCtx.fast) {
-
-        // Get mime type with libmagic
-        if (job->vfile.read_rewindable == NULL) {
-            LOG_WARNING(job->filepath,
-                        "File does not support rewindable reads, cannot guess Media type");
-            goto abort;
-        }
-
-        int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
-        if (bytes_read < 0) {
-
-            if (job->vfile.is_fs_file) {
-                LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
-            } else {
-                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
-            }
-
-            pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
-            ScanCtx.dbg_failed_files_count += 1;
-            pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
-
-            CLOSE_FILE(job->vfile)
-            free(doc->filepath);
-            free(doc);
-
-            return;
-        }
-
-        magic_t magic = magic_open(MAGIC_MIME_TYPE);
-
-        const char *magic_buffers[1] = {magic_database_buffer,};
-        size_t sizes[1] = {sizeof(magic_database_buffer),};
-
-        int load_ret = magic_load_buffers(magic, (void **) &magic_buffers, sizes, 1);
-
-        if (load_ret != 0) {
-            LOG_FATALF("parse.c", "Could not load libmagic database: (%d)", load_ret)
-        }
-
-        const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
-        if (magic_mime_str != NULL) {
-            doc->mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
-
-            LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
-
-            if (doc->mime == 0) {
-                LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
-            }
-        }
-
-        if (job->vfile.reset != NULL) {
-            job->vfile.reset(&job->vfile);
-        }
-
-        magic_close(magic);
-    }
-
-    int mmime = MAJOR_MIME(doc->mime);
-
-    if (!(SHOULD_PARSE(doc->mime))) {
-
-    } else if (IS_RAW(doc->mime)) {
-        parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
-    } else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
-               (mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
-
-        parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
-
-    } else if (IS_PDF(doc->mime)) {
-        parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
-
-    } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
-        if (IS_MARKUP(doc->mime)) {
-            parse_markup(&ScanCtx.text_ctx, &job->vfile, doc);
+    int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
+    if (bytes_read < 0) {
+        if (job->vfile.is_fs_file) {
+            LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno));
        } else {
-            parse_text(&ScanCtx.text_ctx, &job->vfile, doc);
+            LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc));
        }

-    } else if (IS_FONT(doc->mime)) {
-        parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
+        return GET_MIME_ERROR_FATAL;
+    }
+
+    char *magic_mime_str = magic_buffer_embedded(buf, bytes_read);
+
+    if (magic_mime_str != NULL) {
+        mime = (int) mime_get_mime_by_string(magic_mime_str);
+
+        if (mime == 0) {
+            LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
+            free(magic_mime_str);
+            return 0;
+        }
+        free(magic_mime_str);
+    }
+
+    if (job->vfile.reset != NULL) {
+        job->vfile.reset(&job->vfile);
+    }
+
+    return mime;
+}
+
+void parse(parse_job_t *job) {
+
+    if (job->vfile.is_fs_file) {
+        job->vfile.read = fs_read;
+        job->vfile.read_rewindable = fs_read;
+        job->vfile.reset = fs_reset;
+        job->vfile.close = fs_close;
+        job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
+    }
+
+    document_t *doc = malloc(sizeof(document_t));
+
+    strcpy(doc->filepath, job->filepath);
+    doc->ext = job->ext;
+    doc->base = job->base;
+    doc->meta_head = NULL;
+    doc->meta_tail = NULL;
+    doc->size = job->vfile.st_size;
+    doc->mtime = job->vfile.mtime;
+    doc->mime = get_mime(job);
+    generate_doc_id(doc->filepath + ScanCtx.index.desc.root_len, doc->doc_id);
+
+    if (doc->mime == GET_MIME_ERROR_FATAL) {

-    } else if (
-            ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
-                    IS_ARC(doc->mime) ||
-                    (IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
-            )) {
-        parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
-    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
-        parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
-    } else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
-        parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc);
-    } else if (IS_MOBI(doc->mime)) {
-        parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
-    } else if (doc->mime == MIME_SIST2_SIDECAR) {
-        parse_sidecar(&job->vfile, doc);
        CLOSE_FILE(job->vfile)
-        free(doc->filepath);
        free(doc);
        return;
-    } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
-        parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
-    } else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
-        parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
-    } else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
-        parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
    }

-    abort:
+    if (database_mark_document(ProcData.index_db, doc->doc_id, doc->mtime)) {
+
+        CLOSE_FILE(job->vfile)
+        free(doc);
+        return;
+    }
+
+    if (LogCtx.very_verbose) {
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id);
+    }
+
+    switch (get_file_type(doc->mime, doc->size, doc->filepath)) {
+        case FILETYPE_RAW:
+            parse_raw(&ScanCtx.raw_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_MEDIA:
+            parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
+            break;
+        case FILETYPE_EBOOK:
+            parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
+            break;
+        case FILETYPE_MARKUP:
+            parse_markup(&ScanCtx.text_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_TEXT:
+            parse_text(&ScanCtx.text_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_FONT:
+            parse_font(&ScanCtx.font_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_ARCHIVE:
+            parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
+            break;
+        case FILETYPE_OOXML:
+            parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_COMIC:
+            parse_comic(&ScanCtx.comic_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_MOBI:
+            parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_SIST2_SIDECAR:
+            parse_sidecar(&job->vfile, doc);
+            CLOSE_FILE(job->vfile)
+            free(doc);
+            return;
+        case FILETYPE_MSDOC:
+            parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_JSON:
+            parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_NDJSON:
+            parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
+            break;
+        case FILETYPE_DONT_PARSE:
+        default:
+            break;
+    }

    //Parent meta
    if (job->parent[0] != '\0') {
        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
        meta_parent->key = MetaParent;
        strcpy(meta_parent->str_val, job->parent);
-        APPEND_META((doc), meta_parent)
-
-        doc->has_parent = TRUE;
-    } else {
-        doc->has_parent = FALSE;
+        APPEND_META((doc), meta_parent);
    }

    CLOSE_FILE(job->vfile)
@ -246,7 +245,3 @@ void parse(void *arg) {

    write_document(doc);
 }
-
-void cleanup_parse() {
-    // noop
-}
--- a/src/parsing/parse.h
+++ b/src/parsing/parse.h
@ -2,15 +2,9 @@
 #define SIST2_PARSE_H

 #include "../sist.h"
+#include "src/tpool.h"

-#define MAGIC_BUF_SIZE (4096 * 6)

-int fs_read(struct vfile *f, void *buf, size_t size);
-void fs_close(struct vfile *f);
-void fs_reset(struct vfile *f);
-
-void parse(void *arg);
-
-void cleanup_parse();
+void parse(parse_job_t *arg);

 #endif
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@ -4,12 +4,12 @@

 void parse_sidecar(vfile_t *vfile, document_t *doc) {

-    LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)
+    LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath);

    size_t size;
    char *buf = read_all(vfile, &size);
    if (buf == NULL) {
-        LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
+        LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath);
        return;
    }

@ -18,7 +18,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {

    cJSON *json = cJSON_Parse(buf);
    if (json == NULL) {
-        LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath)
+        LOG_ERRORF("sidecar.c", "Could not parse JSON sidecar %s", vfile->filepath);
        return;
    }
    char *json_str = cJSON_PrintUnformatted(json);
@ -32,8 +32,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {

    generate_doc_id(rel_path, assoc_doc_id);

-    store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
-                strlen(json_str) + 1);
+    database_write_document_sidecar(ProcData.index_db, assoc_doc_id, json_str);

    cJSON_Delete(json);
    free(json_str);
--- a/src/sist.h
+++ b/src/sist.h
@ -27,6 +27,8 @@

 #define UNUSED(x) __attribute__((__unused__))  x

+#define MAX_THREADS (256)
+
 #include "util.h"
 #include "log.h"
 #include "types.h"
@ -49,8 +51,11 @@
 #include <ctype.h>
 #include "git_hash.h"

-#define VERSION "2.14.2"
+#define VERSION "3.0.0"
 static const char *const Version = VERSION;
+static const int VersionMajor = 3;
+static const int VersionMinor = 0;
+static const int VersionPatch = 0;

 #ifndef SIST_PLATFORM
 #define SIST_PLATFORM unknown
--- a/src/stats.c
+++ b/src/stats.c
@ -1,343 +0,0 @@
-#include "sist.h"
-#include "io/serialize.h"
-#include "ctx.h"
-
-static GHashTable *FlatTree;
-static GHashTable *BufferTable;
-
-static GHashTable *AggMime;
-static GHashTable *AggSize;
-static GHashTable *AggDate;
-
-#define SIZE_BUCKET (long)(5 * 1024 * 1024)
-#define DATE_BUCKET (long)(2629800)
-
-static long TotalSize = 0;
-static long DocumentCount = 0;
-
-typedef struct {
-    long size;
-    long count;
-} agg_t;
-
-void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
-
-    if (cJSON_GetObjectItem(document, "parent") != NULL) {
-        return;
-    }
-
-    const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
-    char *path = malloc(strlen(json_path) + 1);
-    strcpy(path, json_path);
-
-    const char *json_mime = cJSON_GetObjectItem(document, "mime")->valuestring;
-    char *mime;
-    if (json_mime == NULL) {
-        mime = NULL;
-    } else {
-        mime = malloc(strlen(json_mime) + 1);
-        strcpy(mime, json_mime);
-    }
-
-    long size = (long) cJSON_GetObjectItem(document, "size")->valuedouble;
-    int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
-
-    // treemap
-    void *existing_path = g_hash_table_lookup(FlatTree, path);
-    if (existing_path == NULL) {
-        g_hash_table_insert(FlatTree, path, (gpointer) size);
-    } else {
-        g_hash_table_replace(FlatTree, path, (gpointer) ((long) existing_path + size));
-    }
-
-    // mime agg
-    if (mime != NULL) {
-        agg_t *orig_agg = g_hash_table_lookup(AggMime, mime);
-        if (orig_agg == NULL) {
-            agg_t *agg = malloc(sizeof(agg_t));
-            agg->size = size;
-            agg->count = 1;
-            g_hash_table_insert(AggMime, mime, agg);
-        } else {
-            orig_agg->size += size;
-            orig_agg->count += 1;
-            free(mime);
-        }
-    }
-
-    // size agg
-    long size_bucket = size - (size % SIZE_BUCKET);
-    agg_t *orig_agg = g_hash_table_lookup(AggSize, (gpointer) size_bucket);
-    if (orig_agg == NULL) {
-        agg_t *agg = malloc(sizeof(agg_t));
-        agg->size = size;
-        agg->count = 1;
-        g_hash_table_insert(AggSize, (gpointer) size_bucket, agg);
-    } else {
-        orig_agg->count += 1;
-        orig_agg->size += size;
-    }
-
-    // date agg
-    long date_bucket = mtime - (mtime % DATE_BUCKET);
-    orig_agg = g_hash_table_lookup(AggDate, (gpointer) date_bucket);
-    if (orig_agg == NULL) {
-        agg_t *agg = malloc(sizeof(agg_t));
-        agg->size = size;
-        agg->count = 1;
-        g_hash_table_insert(AggDate, (gpointer) date_bucket, agg);
-    } else {
-        orig_agg->count += 1;
-        orig_agg->size += size;
-    }
-
-    TotalSize += size;
-    DocumentCount += 1;
-}
-
-void read_index_into_tables(index_t *index) {
-    char file_path[PATH_MAX];
-    READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1);
-}
-
-static size_t rfind(const char *str, int c) {
-    for (int i = (int)strlen(str); i >= 0; i--) {
-        if (str[i] == c) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-int merge_up(double thresh) {
-    long min_size = (long) (thresh * (double) TotalSize);
-
-    int count = 0;
-    GHashTableIter iter;
-    g_hash_table_iter_init(&iter, FlatTree);
-
-    void *key;
-    void *value;
-
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        long size = (long) value;
-
-        if (size < min_size) {
-            int stop = rfind(key, '/');
-            if (stop == -1) {
-                stop = 0;
-            }
-            char *parent = malloc(stop + 1);
-            strncpy(parent, key, stop);
-            *(parent + stop) = '\0';
-
-            void *existing_parent = g_hash_table_lookup(FlatTree, parent);
-            if (existing_parent == NULL) {
-                void *existing_parent2_key;
-                void *existing_parent2_val;
-                int found = g_hash_table_lookup_extended(BufferTable, parent, &existing_parent2_key,
-                                                         &existing_parent2_val);
-                if (!found) {
-                    g_hash_table_insert(BufferTable, parent, value);
-                } else {
-                    g_hash_table_replace(BufferTable, parent, (gpointer) ((long) existing_parent2_val + size));
-                    free(existing_parent2_key);
-                }
-            } else {
-                g_hash_table_replace(FlatTree, parent, (gpointer) ((long) existing_parent + size));
-            }
-
-            g_hash_table_iter_remove(&iter);
-
-            count += 1;
-        }
-    }
-
-    g_hash_table_iter_init(&iter, BufferTable);
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        g_hash_table_insert(FlatTree, key, value);
-        g_hash_table_iter_remove(&iter);
-    }
-
-    int size = g_hash_table_size(FlatTree);
-
-    LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
-    return count;
-}
-
-/**
- * Assumes out is at at least PATH_MAX *4
- */
-void csv_escape(char *dst, const char *str) {
-
-    const char *ptr = str;
-    char *out = dst;
-
-    if (rfind(str, ',') == -1 && rfind(str, '"') == -1) {
-        strcpy(dst, str);
-        return;
-    }
-
-    *out++ = '"';
-    char c;
-    while ((c = *ptr++) != 0) {
-        if (c == '"') {
-            *out++ = '"';
-            *out++ = '"';
-        } else {
-            *out++ = c;
-        }
-    }
-    *out++ = '"';
-    *out = '\0';
-}
-
-int open_or_exit(const char *path) {
-    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
-    if (fd < 0) {
-        LOG_FATALF("stats.c", "Error while creating file: %s [%d]\n", strerror(errno), errno)
-    }
-    return fd;
-}
-
-#define TREEMAP_CSV_HEADER "path,size"
-#define MIME_AGG_CSV_HEADER "mime,size,count"
-#define SIZE_AGG_CSV_HEADER "bucket,size,count"
-#define DATE_AGG_CSV_HEADER "bucket,size,count"
-
-void write_treemap_csv(double thresh, const char *out_path) {
-
-    void *key;
-    void *value;
-
-    long min_size = (long) (thresh * (double) TotalSize);
-
-    int fd = open_or_exit(out_path);
-    int ret = write(fd, TREEMAP_CSV_HEADER, sizeof(TREEMAP_CSV_HEADER) - 1);
-    if (ret == -1) {
-        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-    }
-
-    GHashTableIter iter;
-    g_hash_table_iter_init(&iter, FlatTree);
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        long size = (long) value;
-
-        if (size >= min_size) {
-            char path_buf[PATH_MAX * 4];
-            char buf[PATH_MAX * 4 + 16];
-
-            csv_escape(path_buf, key);
-            size_t written = sprintf(buf, "\n%s,%ld", path_buf, (long) value);
-            ret = write(fd, buf, written);
-            if (ret == -1) {
-                LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-            }
-        }
-    }
-    close(fd);
-}
-
-void write_agg_csv_str(const char *out_path, const char *header, GHashTable *table) {
-    void *key;
-    void *value;
-    char buf[4096];
-
-    int fd = open_or_exit(out_path);
-    int ret = write(fd, header, strlen(header));
-    if (ret == -1) {
-        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-    }
-
-    GHashTableIter iter;
-    g_hash_table_iter_init(&iter, table);
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        agg_t *agg = value;
-
-        size_t written = sprintf(buf, "\n%s,%ld,%ld", (const char*)key, agg->size, agg->count);
-        ret = write(fd, buf, written);
-        if (ret == -1) {
-            LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-        }
-    }
-
-    close(fd);
-}
-
-void write_agg_csv_long(const char *out_path, const char *header, GHashTable *table) {
-    void *key;
-    void *value;
-    char buf[4096];
-
-    int fd = open_or_exit(out_path);
-    int ret = write(fd, header, strlen(header));
-    if (ret == -1) {
-        LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-    }
-
-    GHashTableIter iter;
-    g_hash_table_iter_init(&iter, table);
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        agg_t *agg = value;
-        size_t written = sprintf(buf, "\n%ld,%ld,%ld", (long)key, agg->size, agg->count);
-        ret = write(fd, buf, written);
-        if (ret == -1) {
-            LOG_FATALF("stats.c", "Write error: %s", strerror(errno))
-        }
-    }
-
-    close(fd);
-}
-
-int generate_stats(index_t *index, const double threshold, const char *out_prefix) {
-
-    FlatTree = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
-    BufferTable = g_hash_table_new(g_str_hash, g_str_equal);
-
-    AggMime = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
-    AggSize = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
-    AggDate = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free);
-
-    LOG_INFO("stats.c", "Generating stats...")
-
-    read_index_into_tables(index);
-
-    LOG_DEBUG("stats.c", "Read index into tables")
-    LOG_DEBUGF("stats.c", "Total size is %ld", TotalSize)
-    LOG_DEBUGF("stats.c", "Document count is %ld", DocumentCount)
-    LOG_DEBUGF("stats.c", "Merging small directories upwards with a threshold of %f%%", threshold * 100)
-
-    while (merge_up(threshold) > 100) {}
-
-    char tmp[PATH_MAX];
-
-    strncpy(tmp, out_prefix, sizeof(tmp));
-    strcat(tmp, "treemap.csv");
-    write_treemap_csv(threshold, tmp);
-
-    strncpy(tmp, out_prefix, sizeof(tmp));
-    strcat(tmp, "mime_agg.csv");
-    write_agg_csv_str(tmp, MIME_AGG_CSV_HEADER, AggMime);
-
-    strncpy(tmp, out_prefix, sizeof(tmp));
-    strcat(tmp, "size_agg.csv");
-    write_agg_csv_long(tmp, SIZE_AGG_CSV_HEADER, AggSize);
-
-    strncpy(tmp, out_prefix, sizeof(tmp));
-    strcat(tmp, "date_agg.csv");
-    write_agg_csv_long(tmp, DATE_AGG_CSV_HEADER, AggDate);
-
-    g_hash_table_remove_all(FlatTree);
-    g_hash_table_destroy(FlatTree);
-    g_hash_table_destroy(BufferTable);
-
-    g_hash_table_remove_all(AggMime);
-    g_hash_table_destroy(AggMime);
-    g_hash_table_remove_all(AggSize);
-    g_hash_table_destroy(AggSize);
-    g_hash_table_remove_all(AggDate);
-    g_hash_table_destroy(AggDate);
-
-    return 0;
-}
-
--- a/src/stats.h
+++ b/src/stats.h
@ -1,6 +0,0 @@
-#ifndef SIST2_STATS_H
-#define SIST2_STATS_H
-
-int generate_stats(index_t *index, double threshold, const char* out_prefix);
-
-#endif
--- a/src/tpool.c
+++ b/src/tpool.c
@ -2,260 +2,264 @@
 #include "ctx.h"
 #include "sist.h"
 #include <pthread.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include "parsing/parse.h"

-#define MAX_QUEUE_SIZE 1000000
+#define BLANK_STR "                                         "

-typedef void (*thread_func_t)(void *arg);
+typedef struct {
+    int thread_id;
+    tpool_t *pool;
+} start_thread_arg_t;

-typedef struct tpool_work {
-    void *arg;
-    thread_func_t func;
-    struct tpool_work *next;
-} tpool_work_t;

 typedef struct tpool {
-    tpool_work_t *work_head;
-    tpool_work_t *work_tail;
-
-    pthread_mutex_t work_mutex;
-
-    pthread_cond_t has_work_cond;
-    pthread_cond_t working_cond;
-
-    pthread_t *threads;
-
-    int thread_cnt;
-    int work_cnt;
-    int done_cnt;
-    int busy_cnt;
-    int throttle_stuck_cnt;
-    size_t mem_limit;
-    size_t page_size;
-
-    int free_arg;
-    int stop;
-    int waiting;
+    pthread_t threads[256];
+    int num_threads;

    int print_progress;

-    void (*cleanup_func)();
+    struct {
+        job_type_t job_type;
+        int stop;
+        int waiting;
+        database_ipc_ctx_t ipc_ctx;
+        pthread_mutex_t mutex;
+        pthread_mutex_t data_mutex;
+        pthread_cond_t done_working_cond;
+        pthread_cond_t workers_initialized_cond;
+        int busy_count;
+        int initialized_count;
+        int thread_id_to_pid_mapping[MAX_THREADS];
+        char ipc_database_filepath[128];
+    } *shm;
 } tpool_t;

-
-/**
- * Create a work object
- */
-static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
-
-    if (func == NULL) {
-        return NULL;
+void job_destroy(job_t *job) {
+    if (job->type == JOB_PARSE_JOB) {
+        free(job->parse_job);
    }

-    tpool_work_t *work = malloc(sizeof(tpool_work_t));
-    work->func = func;
-    work->arg = arg;
-    work->next = NULL;
-
-    return work;
-}
-
-void tpool_dump_debug_info(tpool_t *pool) {
-    LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
-    LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
-    LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
-    LOG_DEBUGF("tpool.c", "pool->busy_cnt = %d", pool->busy_cnt)
-    LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
-}
-
-/**
- * Pop work object from thread pool
- */
-static tpool_work_t *tpool_work_get(tpool_t *pool) {
-
-    tpool_work_t *work = pool->work_head;
-    if (work == NULL) {
-        return NULL;
-    }
-
-    if (work->next == NULL) {
-        pool->work_head = NULL;
-        pool->work_tail = NULL;
-    } else {
-        pool->work_head = work->next;
-    }
-
-    return work;
+    free(job);
 }

 /**
 * Push work object to thread pool
 */
-int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
+int tpool_add_work(tpool_t *pool, job_t *job) {

-    tpool_work_t *work = tpool_work_create(func, arg);
-    if (work == NULL) {
-        return 0;
+    if (pool->shm->job_type == JOB_UNDEFINED) {
+        pool->shm->job_type = job->type;
+    } else if (pool->shm->job_type != job->type) {
+        LOG_FATAL("tpool.c", "FIXME: tpool cannot queue jobs with different types!");
    }

-    while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
-        usleep(10000);
-    }
+    database_add_work(ProcData.ipc_db, job);

-    pthread_mutex_lock(&(pool->work_mutex));
-    if (pool->work_head == NULL) {
-        pool->work_head = work;
-        pool->work_tail = pool->work_head;
-    } else {
-        pool->work_tail->next = work;
-        pool->work_tail = work;
-    }
-
-    pool->work_cnt++;
-
-    pthread_cond_broadcast(&(pool->has_work_cond));
-    pthread_mutex_unlock(&(pool->work_mutex));
-
-    return 1;
+    return TRUE;
 }

-/**
- * see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
- */
-__always_inline
-static size_t _get_total_mem(tpool_t *pool) {
-    FILE *statmfile = fopen("/proc/self/statm", "r");
-    if (!statmfile)
-        return 0;
+static void worker_thread_loop(tpool_t *pool) {
+    while (TRUE) {
+        if (pool->shm->stop) {
+            break;
+        }

-    long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
-    long int m_resident;
+        if (pool->shm->job_type == JOB_UNDEFINED) {
+            // Wait before first job is queued
+            pthread_mutex_lock(&pool->shm->mutex);
+            pthread_cond_timedwait_ms(&pool->shm->ipc_ctx.has_work_cond, &pool->shm->mutex, 1000);
+            pthread_mutex_unlock(&pool->shm->mutex);
+        }

-    int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
-                   &dummy, /* m_virt */
-                   &m_resident,
-                   &dummy2, /* m_share */
-                   &dummy3, /* m_trs */
-                   &dummy4, /* unused since Linux 2.6; always 0 */
-                   &dummy5, /* m_drs */
-                   &dummy6); /* unused since Linux 2.6; always 0 */
-    fclose(statmfile);
+        job_t *job = database_get_work(ProcData.ipc_db, pool->shm->job_type);

-    if (r == 7) {
-        return m_resident * pool->page_size;
-    } else {
-        return 0;
+        if (job != NULL) {
+            pthread_mutex_lock(&(pool->shm->data_mutex));
+            pool->shm->busy_count += 1;
+            pthread_mutex_unlock(&(pool->shm->data_mutex));
+
+            if (pool->shm->stop) {
+                break;
+            }
+
+            if (job->type == JOB_PARSE_JOB) {
+                parse(job->parse_job);
+            } else if (job->type == JOB_BULK_LINE) {
+                elastic_index_line(job->bulk_line);
+            }
+
+            job_destroy(job);
+
+            pthread_mutex_lock(&(pool->shm->data_mutex));
+            pool->shm->busy_count -= 1;
+            pthread_mutex_unlock(&(pool->shm->data_mutex));
+
+            pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
+            pool->shm->ipc_ctx.completed_job_count += 1;
+            pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
+        }
+
+        if (pool->print_progress) {
+
+            int done = pool->shm->ipc_ctx.completed_job_count;
+            int count = pool->shm->ipc_ctx.completed_job_count + pool->shm->ipc_ctx.job_count;
+
+            if (LogCtx.json_logs) {
+                progress_bar_print_json(done,
+                                        count,
+                                        ScanCtx.stat_tn_size,
+                                        ScanCtx.stat_index_size, pool->shm->waiting);
+            } else {
+                progress_bar_print((double) done / count,
+                                   ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
+            }
+        }
+
+        if (job == NULL) {
+            pthread_mutex_lock(&pool->shm->mutex);
+            pthread_cond_signal(&pool->shm->done_working_cond);
+            pthread_mutex_unlock(&pool->shm->mutex);
+        }
    }
 }

+static void worker_proc_init(tpool_t *pool, int thread_id) {
+    pthread_mutex_lock(&pool->shm->data_mutex);
+    pool->shm->thread_id_to_pid_mapping[thread_id] = getpid();
+    pthread_mutex_unlock(&pool->shm->data_mutex);
+
+    ProcData.thread_id = thread_id;
+
+    if (ScanCtx.index.path[0] != '\0') {
+        ProcData.index_db = database_create(ScanCtx.index.path, INDEX_DATABASE);
+        ProcData.index_db->ipc_ctx = &pool->shm->ipc_ctx;
+        database_open(ProcData.index_db);
+    }
+
+    pthread_mutex_lock(&pool->shm->mutex);
+    ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_CONSUMER_DATABASE);
+    ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
+    database_open(ProcData.ipc_db);
+    pthread_mutex_unlock(&pool->shm->mutex);
+}
+
+void worker_proc_cleanup(tpool_t *pool) {
+    if (ProcData.index_db != NULL) {
+        database_close(ProcData.index_db, FALSE);
+    }
+    database_close(ProcData.ipc_db, FALSE);
+}
+
+#ifndef SIST_DEBUG
+#define TPOOL_FORK
+#endif
+
 /**
 * Thread worker function
 */
 static void *tpool_worker(void *arg) {
-    tpool_t *pool = arg;
-    int stuck_notified = 0;
-    int throttle_ms = 0;
+    tpool_t *pool = ((start_thread_arg_t *) arg)->pool;

+#ifdef TPOOL_FORK
    while (TRUE) {
-        pthread_mutex_lock(&pool->work_mutex);
-        if (pool->stop) {
+        int pid = fork();
+
+        if (pid == 0) {
+            worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
+
+            pthread_mutex_lock(&pool->shm->mutex);
+            pthread_cond_signal(&pool->shm->workers_initialized_cond);
+            pool->shm->initialized_count += 1;
+            pthread_mutex_unlock(&pool->shm->mutex);
+
+            worker_thread_loop(pool);
+
+            pthread_mutex_lock(&pool->shm->mutex);
+            pthread_cond_signal(&pool->shm->done_working_cond);
+            pthread_mutex_unlock(&pool->shm->mutex);
+
+            worker_proc_cleanup(pool);
+
+            exit(0);
+
+        } else {
+            int status;
+            waitpid(pid, &status, 0);
+
+            LOG_DEBUGF("tpool.c", "Child process terminated with status code %d", WEXITSTATUS(status));
+
+            pthread_mutex_lock(&(pool->shm->ipc_ctx.mutex));
+            pool->shm->ipc_ctx.completed_job_count += 1;
+            pthread_mutex_unlock(&(pool->shm->ipc_ctx.mutex));
+
+            pthread_mutex_lock(&(pool->shm->data_mutex));
+            pool->shm->busy_count -= 1;
+            pthread_mutex_unlock(&(pool->shm->data_mutex));
+
+            if (WIFSIGNALED(status)) {
+                int crashed_thread_id = -1;
+                for (int i = 0; i < MAX_THREADS; i++) {
+                    if (pool->shm->thread_id_to_pid_mapping[i] == pid) {
+                        crashed_thread_id = i;
+                        break;
+                    }
+                }
+
+                const char *job_filepath;
+                if (crashed_thread_id != -1) {
+                    job_filepath = pool->shm->ipc_ctx.current_job[crashed_thread_id];
+                } else {
+                    job_filepath = "unknown";
+                }
+
+                LOG_FATALF_NO_EXIT(
+                        "tpool.c",
+                        "Child process crashed (%s).\n"
+                        BLANK_STR "The process was working on %s\n"
+                        BLANK_STR "Please consider creating a bug report at https://github.com/simon987/sist2/issues !\n"
+                        BLANK_STR "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs.\n",
+                        strsignal(WTERMSIG(status)),
+                        job_filepath
+                );
+                continue;
+            }
            break;
        }
-
-        if (pool->work_head == NULL) {
-            pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex));
-        }
-
-        tpool_work_t *work = tpool_work_get(pool);
-        if (work != NULL) {
-            pool->busy_cnt += 1;
-        }
-
-        pthread_mutex_unlock(&(pool->work_mutex));
-
-        if (work != NULL) {
-            stuck_notified = 0;
-            throttle_ms = 0;
-            while (!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
-                if (!stuck_notified && throttle_ms >= 90000) {
-                    // notify the pool that this thread is stuck.
-                    pthread_mutex_lock(&(pool->work_mutex));
-                    pool->throttle_stuck_cnt += 1;
-                    if (pool->throttle_stuck_cnt == pool->thread_cnt) {
-                        LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
-                        pool->stop = TRUE;
-                    }
-                    pthread_mutex_unlock(&(pool->work_mutex));
-                    stuck_notified = 1;
-                }
-                usleep(10000);
-                throttle_ms += 10;
-            }
-
-            if (pool->stop) {
-                break;
-            }
-
-            // we are not stuck anymore. cancel our notification.
-            if (stuck_notified) {
-                pthread_mutex_lock(&(pool->work_mutex));
-                pool->throttle_stuck_cnt -= 1;
-                pthread_mutex_unlock(&(pool->work_mutex));
-            }
-
-            work->func(work->arg);
-            if (pool->free_arg) {
-                free(work->arg);
-            }
-            free(work);
-        }
-
-        pthread_mutex_lock(&(pool->work_mutex));
-        if (work != NULL) {
-            pool->busy_cnt -= 1;
-            pool->done_cnt++;
-        }
-
-        if (pool->print_progress) {
-            if (LogCtx.json_logs) {
-                progress_bar_print_json(pool->done_cnt, pool->work_cnt, ScanCtx.stat_tn_size,
-                                        ScanCtx.stat_index_size, pool->waiting);
-            } else {
-                progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size,
-                                   ScanCtx.stat_index_size);
-            }
-        }
-
-        if (pool->work_head == NULL) {
-            pthread_cond_signal(&(pool->working_cond));
-        }
-        pthread_mutex_unlock(&(pool->work_mutex));
    }

-    if (pool->cleanup_func != NULL) {
-        LOG_INFO("tpool.c", "Executing cleanup function")
-        pool->cleanup_func();
-        LOG_DEBUG("tpool.c", "Done executing cleanup function")
-    }
+#else
+    worker_proc_init(pool, ((start_thread_arg_t *) arg)->thread_id);
+
+    pthread_mutex_lock(&pool->shm->mutex);
+    pthread_cond_signal(&pool->shm->workers_initialized_cond);
+    pool->shm->initialized_count += 1;
+    pthread_mutex_unlock(&pool->shm->mutex);
+
+    worker_thread_loop(pool);
+
+    pthread_mutex_lock(&pool->shm->mutex);
+    pthread_cond_signal(&pool->shm->done_working_cond);
+    pthread_mutex_unlock(&pool->shm->mutex);
+#endif

-    pthread_cond_signal(&(pool->working_cond));
-    pthread_mutex_unlock(&(pool->work_mutex));
    return NULL;
 }

 void tpool_wait(tpool_t *pool) {
-    LOG_DEBUG("tpool.c", "Waiting for worker threads to finish")
-    pthread_mutex_lock(&(pool->work_mutex));
+    LOG_DEBUG("tpool.c", "Waiting for worker threads to finish");
+    pthread_mutex_lock(&pool->shm->mutex);

-    pool->waiting = TRUE;
+    pool->shm->waiting = TRUE;
+    pool->shm->ipc_ctx.no_more_jobs = TRUE;

    while (TRUE) {
-        if (pool->done_cnt < pool->work_cnt) {
-            pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
+        if (pool->shm->ipc_ctx.job_count > 0) {
+            pthread_cond_wait(&(pool->shm->done_working_cond), &pool->shm->mutex);
        } else {
-            LOG_INFOF("tpool.c", "Received head=NULL signal, busy_cnt=%d", pool->busy_cnt);
-
-            if (pool->done_cnt == pool->work_cnt && pool->busy_cnt == 0) {
-                pool->stop = TRUE;
+            if (pool->shm->ipc_ctx.job_count == 0 && pool->shm->busy_count == 0) {
+                pool->shm->stop = TRUE;
                break;
            }
        }
@ -263,34 +267,21 @@ void tpool_wait(tpool_t *pool) {
    if (pool->print_progress && !LogCtx.json_logs) {
        progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
    }
-    pthread_mutex_unlock(&(pool->work_mutex));
+    pthread_mutex_unlock(&pool->shm->mutex);

-    LOG_INFO("tpool.c", "Worker threads finished")
+    LOG_INFO("tpool.c", "Worker threads finished");
 }

 void tpool_destroy(tpool_t *pool) {
-    if (pool == NULL) {
-        return;
-    }
+    LOG_INFO("tpool.c", "Destroying thread pool");

-    LOG_INFO("tpool.c", "Destroying thread pool")
+    database_close(ProcData.ipc_db, FALSE);

-    pthread_mutex_lock(&(pool->work_mutex));
-    tpool_work_t *work = pool->work_head;
-    int count = 0;
-    while (work != NULL) {
-        tpool_work_t *tmp = work->next;
-        free(work);
-        work = tmp;
-        count += 1;
-    }
+    pthread_mutex_lock(&pool->shm->mutex);
+    pthread_cond_broadcast(&pool->shm->ipc_ctx.has_work_cond);
+    pthread_mutex_unlock(&pool->shm->mutex);

-    LOG_DEBUGF("tpool.c", "Destroyed %d jobs", count);
-
-    pthread_cond_broadcast(&(pool->has_work_cond));
-    pthread_mutex_unlock(&(pool->work_mutex));
-
-    for (size_t i = 0; i < pool->thread_cnt; i++) {
+    for (size_t i = 0; i < pool->num_threads; i++) {
        pthread_t thread = pool->threads[i];
        if (thread != 0) {
            void *_;
@ -298,53 +289,79 @@ void tpool_destroy(tpool_t *pool) {
        }
    }

-    LOG_INFO("tpool.c", "Final cleanup")
+    pthread_mutex_destroy(&pool->shm->ipc_ctx.mutex);
+    pthread_mutex_destroy(&pool->shm->mutex);
+    pthread_cond_destroy(&pool->shm->ipc_ctx.has_work_cond);
+    pthread_cond_destroy(&pool->shm->done_working_cond);

-    pthread_mutex_destroy(&(pool->work_mutex));
-    pthread_cond_destroy(&(pool->has_work_cond));
-    pthread_cond_destroy(&(pool->working_cond));
-
-    free(pool->threads);
-    free(pool);
+    munmap(pool->shm, sizeof(*pool->shm));
 }

 /**
 * Create a thread pool
 * @param thread_cnt Worker threads count
 */
-tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
+tpool_t *tpool_create(int thread_cnt, int print_progress) {

    tpool_t *pool = malloc(sizeof(tpool_t));
-    pool->thread_cnt = thread_cnt;
-    pool->work_cnt = 0;
-    pool->done_cnt = 0;
-    pool->busy_cnt = 0;
-    pool->throttle_stuck_cnt = 0;
-    pool->mem_limit = mem_limit;
-    pool->stop = FALSE;
-    pool->waiting = FALSE;
-    pool->free_arg = free_arg;
-    pool->cleanup_func = cleanup_func;
-    pool->threads = calloc(sizeof(pthread_t), thread_cnt);
+
+    pool->shm = mmap(NULL, sizeof(*pool->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+    pool->num_threads = thread_cnt;
+    pool->shm->ipc_ctx.job_count = 0;
+    pool->shm->ipc_ctx.no_more_jobs = FALSE;
+    pool->shm->stop = FALSE;
+    pool->shm->waiting = FALSE;
+    pool->shm->job_type = JOB_UNDEFINED;
+    memset(pool->threads, 0, sizeof(pool->threads));
    pool->print_progress = print_progress;
-    pool->page_size = getpagesize();
+    sprintf(pool->shm->ipc_database_filepath, "/dev/shm/sist2-ipc-%d.sqlite", getpid());

-    pthread_mutex_init(&(pool->work_mutex), NULL);
+    pthread_mutexattr_t mutexattr;
+    pthread_mutexattr_init(&mutexattr);
+    pthread_mutexattr_setpshared(&mutexattr, TRUE);

-    pthread_cond_init(&(pool->has_work_cond), NULL);
-    pthread_cond_init(&(pool->working_cond), NULL);
+    pthread_mutex_init(&(pool->shm->mutex), &mutexattr);
+    pthread_mutex_init(&(pool->shm->data_mutex), &mutexattr);
+    pthread_mutex_init(&(pool->shm->ipc_ctx.mutex), &mutexattr);
+    pthread_mutex_init(&(pool->shm->ipc_ctx.db_mutex), &mutexattr);
+    pthread_mutex_init(&(pool->shm->ipc_ctx.index_db_mutex), &mutexattr);

-    pool->work_head = NULL;
-    pool->work_tail = NULL;
+    pthread_condattr_t condattr;
+    pthread_condattr_init(&condattr);
+    pthread_condattr_setpshared(&condattr, TRUE);
+
+    pthread_cond_init(&(pool->shm->ipc_ctx.has_work_cond), &condattr);
+    pthread_cond_init(&(pool->shm->done_working_cond), &condattr);
+    pthread_cond_init(&(pool->shm->workers_initialized_cond), &condattr);
+
+    ProcData.ipc_db = database_create(pool->shm->ipc_database_filepath, IPC_PRODUCER_DATABASE);
+    ProcData.ipc_db->ipc_ctx = &pool->shm->ipc_ctx;
+    database_initialize(ProcData.ipc_db);

    return pool;
 }

 void tpool_start(tpool_t *pool) {

-    LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt)
+    LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->num_threads);

-    for (size_t i = 0; i < pool->thread_cnt; i++) {
-        pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
+    pthread_mutex_lock(&pool->shm->mutex);
+
+    for (int i = 0; i < pool->num_threads; i++) {
+
+        start_thread_arg_t *arg = malloc(sizeof(start_thread_arg_t));
+        arg->thread_id = i + 1;
+        arg->pool = pool;
+
+        pthread_create(&pool->threads[i], NULL, tpool_worker, arg);
    }
+
+    // Only open the database when all workers are done initializing
+    while (pool->shm->initialized_count != pool->num_threads) {
+        pthread_cond_wait(&pool->shm->workers_initialized_cond, &pool->shm->mutex);
+    }
+    pthread_mutex_unlock(&pool->shm->mutex);
+
+    database_open(ProcData.ipc_db);
 }
--- a/src/tpool.h
+++ b/src/tpool.h
@ -2,20 +2,24 @@
 #define SIST2_TPOOL_H

 #include "sist.h"
+#include "third-party/libscan/libscan/scan.h"
+#include "index/elastic.h"
+#include "src/database/database.h"

 struct tpool;
 typedef struct tpool tpool_t;

-typedef void (*thread_func_t)(void *arg);
+tpool_t *tpool_create(int num, int print_progress);

-tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
 void tpool_start(tpool_t *pool);
+
 void tpool_destroy(tpool_t *pool);

-int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
+int tpool_add_work(tpool_t *pool, job_t *job);
+
 void tpool_wait(tpool_t *pool);

-void tpool_dump_debug_info(tpool_t *pool);
+void job_destroy(job_t *job);

 #endif

--- a/src/types.h
+++ b/src/types.h
@ -1,24 +1,26 @@
 #ifndef SIST2_TYPES_H
 #define SIST2_TYPES_H

-#define INDEX_TYPE_NDJSON "ndjson"
+typedef struct database database_t;

 typedef struct index_descriptor {
    char id[SIST_INDEX_ID_LEN];
    char version[64];
+    int version_major;
+    int version_minor;
+    int version_patch;
    long timestamp;
    char root[PATH_MAX];
    char rewrite_url[8192];
-    short root_len;
+    int root_len;
    char name[1024];
-    char type[64];
 } index_descriptor_t;

 typedef struct index_t {
    struct index_descriptor desc;
-    struct store_t *store;
-    struct store_t *tag_store;
-    struct store_t *meta_store;
+
+    database_t *db;
+
    char path[PATH_MAX];
 } index_t;

--- a/src/util.c
+++ b/src/util.c
@ -25,7 +25,6 @@ dyn_buffer_t url_escape(char *str) {
 }

 char *abspath(const char *path) {
-
    char *expanded = expandpath(path);

    char *abs = realpath(expanded, NULL);
@ -34,8 +33,7 @@ char *abspath(const char *path) {
        return NULL;
    }
    if (strlen(abs) > 1) {
-        abs = realloc(abs, strlen(abs) + 2);
-        strcat(abs, "/");
+        abs = realloc(abs, strlen(abs) + 1);
    }

    return abs;
@ -76,9 +74,8 @@ char *expandpath(const char *path) {
        }
    }

-    char *expanded = malloc(strlen(tmp) + 2);
+    char *expanded = malloc(strlen(tmp) + 1);
    strcpy(expanded, tmp);
-    strcat(expanded, "/");

    wordfree(&w);
    return expanded;
@ -103,7 +100,13 @@ void progress_bar_print_json(size_t done, size_t count, size_t tn_size, size_t i

 void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {

+    if (isnan(percentage)) {
+        return;
+    }
+
+    // TODO: Fix this with shm/ctx
    static int last_val = -1;
+
    int val = (int) (percentage * 100);
    if (last_val == val || val > 100) {
        return;
@ -148,10 +151,6 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
    PrintingProgressBar = TRUE;
 }

-GHashTable *incremental_get_table() {
-    GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
-    return file_table;
-}

 const char *find_file_in_paths(const char *paths[], const char *filename) {

@ -165,7 +164,7 @@ const char *find_file_in_paths(const char *paths[], const char *filename) {
        char path[PATH_MAX];
        snprintf(path, sizeof(path), "%s%s", apath, filename);

-        LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
+        LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath);
        free(apath);

        struct stat info;
@ -267,3 +266,39 @@ void str_unescape(char *dst, const char *str) {
    }
    *cur = '\0';
 }
+
+#define NSEC_PER_SEC 1000000000
+
+struct timespec timespec_normalise(struct timespec ts) {
+    while (ts.tv_nsec >= NSEC_PER_SEC) {
+        ts.tv_sec += 1;
+        ts.tv_nsec -= NSEC_PER_SEC;
+    }
+
+    while (ts.tv_nsec <= -NSEC_PER_SEC) {
+        ts.tv_sec -= 1;
+        ts.tv_nsec += NSEC_PER_SEC;
+    }
+
+    if (ts.tv_nsec < 0) {
+        ts.tv_sec -= 1;
+        ts.tv_nsec = (NSEC_PER_SEC + ts.tv_nsec);
+    }
+
+    return ts;
+}
+
+struct timespec timespec_add(struct timespec ts1, long usec) {
+    ts1 = timespec_normalise(ts1);
+
+    struct timespec ts2 = timespec_normalise((struct timespec) {
+            .tv_sec = 0,
+            .tv_nsec = usec * 1000
+    });
+
+    ts1.tv_sec += ts2.tv_sec;
+    ts1.tv_nsec += ts2.tv_nsec;
+
+    return timespec_normalise(ts1);
+}
+
--- a/src/util.h
+++ b/src/util.h
@ -5,8 +5,6 @@
 #include <stdlib.h>
 #include <string.h>

-#include <glib.h>
-
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"

@ -22,9 +20,6 @@ extern int PrintingProgressBar;
 void progress_bar_print_json(size_t done, size_t count,  size_t tn_size, size_t index_size, int waiting);
 void progress_bar_print(double percentage, size_t tn_size, size_t index_size);

-GHashTable *incremental_get_table();
-
-
 const char *find_file_in_paths(const char **paths, const char *filename);


@ -100,31 +95,23 @@ static void generate_doc_id(const char *rel_path, char *doc_id) {
    buf2hex(md, sizeof(md), doc_id);
 }

-__always_inline
-static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
-    char *ptr = malloc(SIST_DOC_ID_LEN);
-    strcpy(ptr, doc_id);
-    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
-}
+#define MILLISECOND 1000

-__always_inline
-static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
-    if (table != NULL) {
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
-    } else {
-        return 0;
-    }
-}
+struct timespec timespec_add(struct timespec ts1, long usec);

-/**
- * Marks a file by adding it to a table.
- * !!Not thread safe.
- */
-__always_inline
-static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
-    char *ptr = malloc(SIST_DOC_ID_LEN);
-    strcpy(ptr, doc_id);
-    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
-}
+#define TIMER_INIT() struct timespec timer_begin
+#define TIMER_START() clock_gettime(CLOCK_REALTIME, &timer_begin)
+#define TIMER_END(x) do { \
+    struct timespec timer_end;                   \
+    clock_gettime(CLOCK_REALTIME, &timer_end);   \
+    x = (timer_end.tv_sec - timer_begin.tv_sec) * 1000000 + (timer_end.tv_nsec - timer_begin.tv_nsec) / 1000; \
+} while (0)
+
+#define pthread_cond_timedwait_ms(cond, mutex, delay_ms) do {\
+        struct timespec now; \
+        clock_gettime(CLOCK_REALTIME, &now); \
+        struct timespec end_time = timespec_add(now, MILLISECOND * delay_ms); \
+        pthread_cond_timedwait(cond, mutex, &end_time); \
+    } while (0)

 #endif
--- a/src/web/serve.c
+++ b/src/web/serve.c
@ -1,15 +1,13 @@
 #include "serve.h"

 #include "src/sist.h"
-#include "src/io/store.h"
-#include "static_generated.c"
 #include "src/index/elastic.h"
 #include "src/index/web.h"
 #include "src/auth0/auth0_c_api.h"
+#include "src/web/web_util.h"

 #include <src/ctx.h>

-#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
 #define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
 #define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");

@ -20,62 +18,6 @@ static struct mg_http_serve_opts DefaultServeOpts = {
        .mime_types = ""
 };

-
-__always_inline
-static char *address_to_string(struct mg_addr *addr) {
-    static char address_to_string_buf[INET6_ADDRSTRLEN];
-
-    return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
-}
-
-static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
-    mg_printf(
-            nc,
-            "HTTP/1.1 %d %s\r\n"
-            HTTP_SERVER_HEADER
-            "Content-Length: %d\r\n"
-            "%s\r\n\r\n",
-            status_code, "OK",
-            length,
-            extra_headers
-    );
-}
-
-
-index_t *get_index_by_id(const char *index_id) {
-    for (int i = WebCtx.index_count; i >= 0; i--) {
-        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
-            return &WebCtx.indices[i];
-        }
-    }
-    return NULL;
-}
-
-store_t *get_store(const char *index_id) {
-    index_t *idx = get_index_by_id(index_id);
-    if (idx != NULL) {
-        return idx->store;
-    }
-    return NULL;
-}
-
-store_t *get_tag_store(const char *index_id) {
-    index_t *idx = get_index_by_id(index_id);
-    if (idx != NULL) {
-        return idx->tag_store;
-    }
-    return NULL;
-}
-
-void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (WebCtx.dev) {
-        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
-    } else {
-        send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
-        mg_send(nc, index_html, sizeof(index_html));
-    }
-}
-
 void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {

    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
@ -87,7 +29,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
    *(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';

-    index_t *index = get_index_by_id(arg_index_id);
+    index_t *index = web_get_index_by_id(arg_index_id);
    if (index == NULL) {
        HTTP_REPLY_NOT_FOUND
        return;
@ -123,87 +65,58 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
    mg_http_serve_file(nc, hm, full_path, &opts);
 }

-void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_index_html(struct mg_connection *nc, struct mg_http_message *hm) {
+    if (WebCtx.dev) {
+        mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
+    } else {
+        web_serve_asset_index_html(nc);
+    }
+}
+
+void serve_index_js(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
    } else {
-        send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
-        mg_send(nc, index_js, sizeof(index_js));
+        web_serve_asset_index_js(nc);
    }
 }

-void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm) {
    if (WebCtx.dev) {
        mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
    } else {
-        send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
-        mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
+        web_serve_asset_chunk_vendors_js(nc);
    }
 }

-void favicon(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
-    mg_send(nc, favicon_ico, sizeof(favicon_ico));
+void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) {
+    web_serve_asset_favicon_ico(nc);
 }

-void style(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(index_css), "Content-Type: text/css");
-    mg_send(nc, index_css, sizeof(index_css));
+void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) {
+    web_serve_asset_style_css(nc);
 }

-void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
-    send_response_line(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
-    mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
+void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) {
+    web_serve_asset_chunk_vendors_css(nc);
 }

-void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, const char *arg_index,
+        const char *arg_doc_id, int arg_num) {

-    int has_thumbnail_index = FALSE;
-
-    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
-
-        if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
-            LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-            HTTP_REPLY_NOT_FOUND
-            return;
-        }
-        has_thumbnail_index = TRUE;
-    }
-
-    char arg_doc_id[SIST_DOC_ID_LEN];
-    char arg_index[SIST_INDEX_ID_LEN];
-
-    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
-    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
-    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
-    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
-
-    store_t *store = get_store(arg_index);
-    if (store == NULL) {
-        LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
+    database_t *db = web_get_database(arg_index);
+    if (db == NULL) {
+        LOG_DEBUGF("serve.c", "Could not get database for index: %s", arg_index);
        HTTP_REPLY_NOT_FOUND
        return;
    }

-    char *data;
    size_t data_len = 0;

-    if (has_thumbnail_index) {
-        const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
-
-        char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
-
-        memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
-        memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
-        *(tn_key + sizeof(tn_key) - 1) = '\0';
-
-        data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
-    } else {
-        data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
-    }
+    void *data = database_read_thumbnail(db, arg_doc_id, arg_num, &data_len);

    if (data_len != 0) {
-        send_response_line(
+        web_send_headers(
                nc, 200, data_len,
                "Content-Type: image/jpeg\r\n"
                "Cache-Control: max-age=31536000"
@ -216,10 +129,50 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
    }
 }

-void search(struct mg_connection *nc, struct mg_http_message *hm) {
+void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
+    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 5) {
+        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
+        HTTP_REPLY_NOT_FOUND
+        return;
+    }

+    char arg_doc_id[SIST_DOC_ID_LEN];
+    char arg_index[SIST_INDEX_ID_LEN];
+    char arg_num[5] = {0};
+
+    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
+    memcpy(arg_num, hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 3, 4);
+
+    int num = (int) strtol(arg_num, NULL, 10);
+
+    serve_thumbnail(nc, hm, arg_index, arg_doc_id, num);
+}
+
+void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
+
+    if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
+        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
+        HTTP_REPLY_NOT_FOUND
+        return;
+    }
+
+    char arg_doc_id[SIST_DOC_ID_LEN];
+    char arg_index[SIST_INDEX_ID_LEN];
+
+    memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
+    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+    memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
+    *(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
+
+    serve_thumbnail(nc, hm, arg_index, arg_doc_id, 0);
+}
+
+void search(struct mg_connection *nc, struct mg_http_message *hm) {
    if (hm->body.len == 0) {
-        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
+        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request");
        mg_http_reply(nc, 400, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
        return;
    }
@ -266,7 +219,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s

    if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
        LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
-                               "serving file from disk might not work as expected.")
+                               "serving file from disk might not work as expected.");
    }

    const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
@ -285,7 +238,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
             idx->desc.root, path_unescaped, strlen(path_unescaped) == 0 ? "" : "/",
             name_unescaped, strlen(ext) == 0 ? "" : ".", ext);

-    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)
+    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path);

    char disposition[8192];
    snprintf(disposition, sizeof(disposition),
@ -372,7 +325,7 @@ void index_info(struct mg_connection *nc) {

    char *json_str = cJSON_PrintUnformatted(json);

-    send_response_line(nc, 200, strlen(json_str), "Content-Type: application/json");
+    web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
    mg_send(nc, json_str, strlen(json_str));
    free(json_str);
    cJSON_Delete(json);
@ -382,7 +335,7 @@ void index_info(struct mg_connection *nc) {
 void file(struct mg_connection *nc, struct mg_http_message *hm) {

    if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
-        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
+        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
        HTTP_REPLY_NOT_FOUND
        return;
    }
@ -412,7 +365,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
        next = parent->valuestring;
    }

-    index_t *idx = get_index_by_id(index_id->valuestring);
+    index_t *idx = web_get_index_by_id(index_id->valuestring);

    if (idx == NULL) {
        cJSON_Delete(doc);
@ -431,9 +384,9 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
 void status(struct mg_connection *nc) {
    char *status = elastic_get_status();
    if (strcmp(status, "open") == 0) {
-        send_response_line(nc, 204, 0, "Content-Type: application/json");
+        web_send_headers(nc, 204, 0, "Content-Type: application/json");
    } else {
-        send_response_line(nc, 500, 0, "Content-Type: application/json");
+        web_send_headers(nc, 500, 0, "Content-Type: application/json");
    }

    free(status);
@ -475,114 +428,114 @@ tag_req_t *parse_tag_request(cJSON *json) {
 }

 void tag(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
-        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    char arg_index[SIST_INDEX_ID_LEN];
-    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
-    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
-
-    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
-        LOG_DEBUG("serve.c", "Invalid tag request")
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    store_t *store = get_tag_store(arg_index);
-    if (store == NULL) {
-        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
-        HTTP_REPLY_NOT_FOUND
-        return;
-    }
-
-    char *body = malloc(hm->body.len + 1);
-    memcpy(body, hm->body.ptr, hm->body.len);
-    *(body + hm->body.len) = '\0';
-    cJSON *json = cJSON_Parse(body);
-
-    tag_req_t *arg_req = parse_tag_request(json);
-    if (arg_req == NULL) {
-        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
-        cJSON_Delete(json);
-        free(body);
-        mg_http_reply(nc, 400, "", "Invalid request");
-        return;
-    }
-
-    cJSON *arr = NULL;
-
-    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
-    if (data_len == 0) {
-        arr = cJSON_CreateArray();
-    } else {
-        arr = cJSON_Parse(data);
-    }
-
-    if (arg_req->delete) {
-
-        if (data_len > 0) {
-            cJSON *element = NULL;
-            int i = 0;
-            cJSON_ArrayForEach(element, arr) {
-                if (strcmp(element->valuestring, arg_req->name) == 0) {
-                    cJSON_DeleteItemFromArray(arr, i);
-                    break;
-                }
-                i++;
-            }
-        }
-
-        char *buf = malloc(sizeof(char) * 8192);
-        snprintf(buf, 8192,
-                 "{"
-                 "    \"script\" : {"
-                 "        \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
-                 "        \"lang\": \"painless\","
-                 "        \"params\" : {"
-                 "            \"tag\" : \"%s\""
-                 "        }"
-                 "    }"
-                 "}", arg_req->name
-        );
-
-        char url[4096];
-        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
-
-    } else {
-        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
-
-        char *buf = malloc(sizeof(char) * 8192);
-        snprintf(buf, 8192,
-                 "{"
-                 "    \"script\" : {"
-                 "        \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
-                 "        \"lang\": \"painless\","
-                 "        \"params\" : {"
-                 "            \"tag\" : \"%s\""
-                 "        }"
-                 "    }"
-                 "}", arg_req->name
-        );
-
-        char url[4096];
-        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
-    }
-
-    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
-    store_flush(store);
-
-    free(arg_req);
-    free(json_str);
-    cJSON_Delete(json);
-    cJSON_Delete(arr);
-    free(body);
+//    if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
+//        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
+//        HTTP_REPLY_NOT_FOUND
+//        return;
+//    }
+//
+//    char arg_index[SIST_INDEX_ID_LEN];
+//    memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
+//    *(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
+//
+//    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
+//        LOG_DEBUG("serve.c", "Invalid tag request")
+//        HTTP_REPLY_NOT_FOUND
+//        return;
+//    }
+//
+//    store_t *store = get_tag_store(arg_index);
+//    if (store == NULL) {
+//        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
+//        HTTP_REPLY_NOT_FOUND
+//        return;
+//    }
+//
+//    char *body = malloc(hm->body.len + 1);
+//    memcpy(body, hm->body.ptr, hm->body.len);
+//    *(body + hm->body.len) = '\0';
+//    cJSON *json = cJSON_Parse(body);
+//
+//    tag_req_t *arg_req = parse_tag_request(json);
+//    if (arg_req == NULL) {
+//        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
+//        cJSON_Delete(json);
+//        free(body);
+//        mg_http_reply(nc, 400, "", "Invalid request");
+//        return;
+//    }
+//
+//    cJSON *arr = NULL;
+//
+//    size_t data_len = 0;
+//    const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
+//    if (data_len == 0) {
+//        arr = cJSON_CreateArray();
+//    } else {
+//        arr = cJSON_Parse(data);
+//    }
+//
+//    if (arg_req->delete) {
+//
+//        if (data_len > 0) {
+//            cJSON *element = NULL;
+//            int i = 0;
+//            cJSON_ArrayForEach(element, arr) {
+//                if (strcmp(element->valuestring, arg_req->name) == 0) {
+//                    cJSON_DeleteItemFromArray(arr, i);
+//                    break;
+//                }
+//                i++;
+//            }
+//        }
+//
+//        char *buf = malloc(sizeof(char) * 8192);
+//        snprintf(buf, 8192,
+//                 "{"
+//                 "    \"script\" : {"
+//                 "        \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
+//                 "        \"lang\": \"painless\","
+//                 "        \"params\" : {"
+//                 "            \"tag\" : \"%s\""
+//                 "        }"
+//                 "    }"
+//                 "}", arg_req->name
+//        );
+//
+//        char url[4096];
+//        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
+//        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
+//
+//    } else {
+//        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
+//
+//        char *buf = malloc(sizeof(char) * 8192);
+//        snprintf(buf, 8192,
+//                 "{"
+//                 "    \"script\" : {"
+//                 "        \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
+//                 "        \"lang\": \"painless\","
+//                 "        \"params\" : {"
+//                 "            \"tag\" : \"%s\""
+//                 "        }"
+//                 "    }"
+//                 "}", arg_req->name
+//        );
+//
+//        char url[4096];
+//        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
+//        nc->fn_data = web_post_async(url, buf, WebCtx.es_insecure_ssl);
+//    }
+//
+//    char *json_str = cJSON_PrintUnformatted(arr);
+//    store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
+//    store_flush(store);
+//
+//    free(arg_req);
+//    free(json_str);
+//    cJSON_Delete(json);
+//    cJSON_Delete(arr);
+//    free(body);
 }

 int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
@ -601,7 +554,7 @@ int check_auth0(struct mg_http_message *hm) {

    struct mg_str *cookie = mg_http_get_header(hm, "Cookie");
    if (cookie == NULL) {
-        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)")
+        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)");
        return FALSE;
    }

@ -610,7 +563,7 @@ int check_auth0(struct mg_http_message *hm) {

    token = mg_http_get_header_var(*cookie, mg_str("sist2-auth0"));
    if (token.len == 0) {
-        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)")
+        LOG_WARNING("serve.c", "Unauthorized request (no auth cookie)");
        return FALSE;
    }

@ -644,28 +597,31 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
            }
        }

+        char uri[256];
+        memcpy(uri, hm->uri.ptr, hm->uri.len);
+        *(uri + hm->uri.len) = '\0';
        LOG_DEBUGF("serve.c", "<%s> GET %s",
-                   address_to_string(&(nc->rem)),
-                   hm->uri
-        )
+                   web_address_to_string(&(nc->rem)),
+                   uri
+        );

        if (mg_http_match_uri(hm, "/")) {
-            search_index(nc, hm);
+            serve_index_html(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/favicon.ico")) {
-            favicon(nc, hm);
+            serve_favicon_ico(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/css/index.css")) {
-            style(nc, hm);
+            serve_style_css(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/css/chunk-vendors.css")) {
-            style_vendor(nc, hm);
+            serve_chunk_vendors_css(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/js/index.js")) {
-            javascript(nc, hm);
+            serve_index_js(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/js/chunk-vendors.js")) {
-            javascript_vendor(nc, hm);
+            serve_chunk_vendors_js(nc, hm);
            return;
        } else if (mg_http_match_uri(hm, "/i")) {
            index_info(nc);
@ -683,6 +639,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
            status(nc);
        } else if (mg_http_match_uri(hm, "/f/*")) {
            file(nc, hm);
+        } else if (mg_http_match_uri(hm, "/t/*/*/*")) {
+            thumbnail_with_num(nc, hm);
        } else if (mg_http_match_uri(hm, "/t/*/*")) {
            thumbnail(nc, hm);
        } else if (mg_http_match_uri(hm, "/s/*/*")) {
@ -706,7 +664,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                response_t *r = ctx->response;

                if (r->status_code == 200) {
-                    send_response_line(nc, 200, r->size, "Content-Type: application/json");
+                    web_send_headers(nc, 200, r->size, "Content-Type: application/json");
                    mg_send(nc, r->body, r->size);
                } else if (r->status_code == 0) {
                    sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");
@ -738,7 +696,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo

 void serve(const char *listen_address) {

-    LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)
+    LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address);

    struct mg_mgr mgr;
    mg_mgr_init(&mgr);
@ -747,12 +705,12 @@ void serve(const char *listen_address) {

    struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
    if (nc == NULL) {
-        LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
+        LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address);
    }

    while (ok) {
        mg_mgr_poll(&mgr, 10);
    }
    mg_mgr_free(&mgr);
-    LOG_INFO("serve.c", "Finished web event loop")
+    LOG_INFO("serve.c", "Finished web event loop");
 }
--- a/src/web/web_util.c
+++ b/src/web/web_util.c
@ -0,0 +1,63 @@
+#include "web_util.h"
+#include "static_generated.c"
+
+
+void web_serve_asset_index_html(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(index_html), "Content-Type: text/html");
+    mg_send(nc, index_html, sizeof(index_html));
+}
+
+void web_serve_asset_index_js(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
+    mg_send(nc, index_js, sizeof(index_js));
+}
+
+void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
+    mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
+}
+
+void web_serve_asset_favicon_ico(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
+    mg_send(nc, favicon_ico, sizeof(favicon_ico));
+}
+
+void web_serve_asset_style_css(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css");
+    mg_send(nc, index_css, sizeof(index_css));
+}
+
+void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) {
+    web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
+    mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
+}
+
+index_t *web_get_index_by_id(const char *index_id) {
+    for (int i = WebCtx.index_count; i >= 0; i--) {
+        if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
+            return &WebCtx.indices[i];
+        }
+    }
+    return NULL;
+}
+
+database_t *web_get_database(const char *index_id) {
+    index_t *idx = web_get_index_by_id(index_id);
+    if (idx != NULL) {
+        return idx->db;
+    }
+    return NULL;
+}
+
+void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
+    mg_printf(
+            nc,
+            "HTTP/1.1 %d %s\r\n"
+    HTTP_SERVER_HEADER
+    "Content-Length: %d\r\n"
+    "%s\r\n\r\n",
+            status_code, "OK",
+            length,
+            extra_headers
+    );
+}
--- a/src/web/web_util.h
+++ b/src/web/web_util.h
@ -0,0 +1,32 @@
+#ifndef SIST2_WEB_UTIL_H
+#define SIST2_WEB_UTIL_H
+
+#include "src/sist.h"
+#include "src/index/elastic.h"
+#include "src/ctx.h"
+#include <mongoose.h>
+
+#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
+
+index_t *web_get_index_by_id(const char *index_id);
+
+database_t *web_get_database(const char *index_id);
+
+__always_inline
+static char *web_address_to_string(struct mg_addr *addr) {
+    return "TODO";
+//    static char address_to_string_buf[INET6_ADDRSTRLEN];
+//
+//    return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
+}
+
+void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers);
+
+void web_serve_asset_index_html(struct mg_connection *nc);
+void web_serve_asset_index_js(struct mg_connection *nc);
+void web_serve_asset_chunk_vendors_js(struct mg_connection *nc);
+void web_serve_asset_favicon_ico(struct mg_connection *nc);
+void web_serve_asset_style_css(struct mg_connection *nc);
+void web_serve_asset_chunk_vendors_css(struct mg_connection *nc);
+
+#endif //SIST2_WEB_UTIL_H
--- a/third-party/libscan/CMakeLists.txt
+++ b/third-party/libscan/CMakeLists.txt
@ -78,6 +78,7 @@ else()
            -fno-stack-protector
            -fomit-frame-pointer
            #-freciprocal-math
+            -w
    )
 endif()

@ -97,19 +98,19 @@ find_package(LibLZMA REQUIRED)
 find_package(ZLIB REQUIRED)
 find_package(unofficial-pcre CONFIG REQUIRED)

-
 find_library(JBIG2DEC_LIB NAMES jbig2decd jbig2dec)
 find_library(HARFBUZZ_LIB NAMES harfbuzz harfbuzzd)
-find_library(FREETYPE_LIB NAMES freetype freetyped)
-find_package(unofficial-brotli CONFIG REQUIRED)
 find_library(LZO2_LIB NAMES lzo2)

-find_library(RAW_LIB NAMES libraw.a)
 find_library(MUPDF_LIB NAMES liblibmupdf.a)
 find_library(CMS_LIB NAMES lcms2)
 find_library(JAS_LIB NAMES jasper)
 find_library(GUMBO_LIB NAMES gumbo)
-find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/)
+find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/)
+find_package(Leptonica CONFIG REQUIRED)
+find_package(FFMPEG REQUIRED)
+find_package(libraw CONFIG REQUIRED)
+find_package(Freetype REQUIRED)


 target_compile_options(
@ -118,39 +119,7 @@ target_compile_options(
        -g
 )

-if (SIST_DEBUG)
-    SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
-else()
-    SET(FFMPEG_DEBUG "")
-endif()
-
 include(ExternalProject)
-find_program(MAKE_EXE NAMES gmake nmake make)
-
-ExternalProject_Add(
-        ffmpeg
-        GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
-        GIT_TAG "n4.4"
-
-        UPDATE_COMMAND ""
-        PATCH_COMMAND ""
-        TEST_COMMAND ""
-        CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay
-        --disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa
-        --disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2
-        --disable-network  ${FFMPEG_DEBUG}
-        INSTALL_COMMAND ""
-
-        PREFIX "third-party/ext_ffmpeg"
-        SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg"
-        BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg"
-
-        BUILD_COMMAND ${MAKE_EXE} -j33 --silent
-)
-
-SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
-SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
-
 ExternalProject_Add(
        libwpd
        URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz
@ -165,19 +134,20 @@ ExternalProject_Add(
        SOURCE_DIR "third-party/ext_libwpd/src/libwpd"
        BINARY_DIR "third-party/ext_libwpd/src/libwpd"

-        BUILD_COMMAND ${MAKE_EXE} -j33
+        BUILD_COMMAND make -j33
 )
 SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/)
 SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/)

 add_dependencies(
        scan
-        ffmpeg
        antiword
        libwpd
        mobi
 )

+
+target_link_directories(scan PUBLIC ${FFMPEG_LIBRARY_DIRS})
 target_link_libraries(
        scan
        PUBLIC
@ -202,35 +172,26 @@ target_link_libraries(

        stdc++

-        -Wl,--whole-archive
-        m
-        -Wl,--no-whole-archive
-
        ${JPEG_LIBRARIES}
        ${Tesseract_LIBRARIES}
        ${LIBXML2_LIBRARIES}
-        ${FREETYPE_LIB}
-        unofficial::brotli::brotlidec-static
-
-        ${FFMPEG_LIB_DIR}/libavformat/libavformat.a
-        ${FFMPEG_LIB_DIR}/libavcodec/libavcodec.a
-        ${FFMPEG_LIB_DIR}/libavutil/libavutil.a
-        ${FFMPEG_LIB_DIR}/libswresample/libswresample.a
-        ${FFMPEG_LIB_DIR}/libswscale/libswscale.a
+        Freetype::Freetype

        z

        ${CMAKE_THREAD_LIBS_INIT}

-        ${RAW_LIB}
        ${GOMP_LIB}
        ${CMS_LIB}
        ${JAS_LIB}
        ${GUMBO_LIB}
-        dl
+
        antiword
        mobi
        unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
+        leptonica
+        libraw::raw
+        ${FFMPEG_LIBRARIES}
 )

 target_include_directories(
@ -242,6 +203,7 @@ target_include_directories(
        ${FFMPEG_INCLUDE_DIR}
        ${MOBI_INCLUDE_DIR}
        ${WPD_INCLUDE_DIR}
+        ${FFMPEG_INCLUDE_DIRS}
 )

 if (BUILD_TESTS)
--- a/third-party/libscan/libscan/arc/arc.c
+++ b/third-party/libscan/libscan/arc/arc.c
@ -9,27 +9,13 @@

 #define MAX_DECOMPRESSED_SIZE_RATIO 40.0

-int should_parse_filtered_file(const char *filepath, int ext) {
-    char tmp[PATH_MAX * 2];
+int should_parse_filtered_file(const char *filepath) {

-    if (ext == 0) {
-        return FALSE;
-    }
-
-    if (strncmp(filepath + ext, "tgz", 3) == 0) {
+    if (strstr(filepath, ".tgz")) {
        return TRUE;
    }

-    memcpy(tmp, filepath, ext - 1);
-    *(tmp + ext - 1) = '\0';
-
-    char *idx = strrchr(tmp, '.');
-
-    if (idx == NULL) {
-        return FALSE;
-    }
-
-    if (strcmp(idx, ".tar") == 0) {
+    if (strstr(filepath, ".tar.")) {
        return TRUE;
    }

@ -161,7 +147,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
    }

    if (ret != ARCHIVE_OK) {
-        CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
+        CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a));
        archive_read_free(a);
        return SCAN_ERR_READ;
    }
@ -183,19 +169,18 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
        meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
        meta_list->key = MetaContent;
        strcpy(meta_list->str_val, buf.buf);
-        APPEND_META(doc, meta_list)
+        APPEND_META(doc, meta_list);
        dyn_buffer_destroy(&buf);

    } else {

-        parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
+        parse_job_t *sub_job = malloc(sizeof(parse_job_t));

        sub_job->vfile.close = arc_close;
        sub_job->vfile.read = arc_read;
        sub_job->vfile.read_rewindable = arc_read_rewindable;
        sub_job->vfile.reset = NULL;
        sub_job->vfile.arc = a;
-        sub_job->vfile.filepath = sub_job->filepath;
        sub_job->vfile.is_fs_file = FALSE;
        sub_job->vfile.rewind_buffer_size = 0;
        sub_job->vfile.rewind_buffer = NULL;
@ -206,28 +191,34 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
        strcpy(sub_job->parent, doc->doc_id);

        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
-            sub_job->vfile.info = *archive_entry_stat(entry);
+            struct stat entry_stat = *archive_entry_stat(entry);
+            sub_job->vfile.st_size = entry_stat.st_size;
+            sub_job->vfile.mtime = (int) entry_stat.st_mtim.tv_sec;

-            double decompressed_size_ratio = (double) sub_job->vfile.info.st_size / (double) f->info.st_size;
-            if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) {
-                CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath, decompressed_size_ratio)
-                continue;
-            }
-
-            if (S_ISREG(sub_job->vfile.info.st_mode)) {
+            if (S_ISREG(entry_stat.st_mode)) {

                const char *utf8_name = archive_entry_pathname_utf8(entry);

                if (utf8_name == NULL) {
-                    sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
+                    snprintf(sub_job->filepath, sizeof(sub_job->filepath), "%s#/%s", f->filepath,
+                             archive_entry_pathname(entry));
+                    strcpy(sub_job->vfile.filepath, sub_job->filepath);
                } else {
-                    sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name);
+                    snprintf(sub_job->filepath, sizeof(sub_job->filepath), "%s#/%s", f->filepath, utf8_name);
+                    strcpy(sub_job->vfile.filepath, sub_job->filepath);
                }
                sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;

+                double decompressed_size_ratio = (double) sub_job->vfile.st_size / (double) f->st_size;
+                if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) {
+                    CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath,
+                                   decompressed_size_ratio);
+                    break;
+                }
+
                // Handle excludes
                if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
-                    CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
+                    CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath);
                    continue;
                }

--- a/third-party/libscan/libscan/arc/arc.h
+++ b/third-party/libscan/libscan/arc/arc.h
@ -67,7 +67,7 @@ static int vfile_close_callback(struct archive *a, void *user_data) {

 int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse);

-int should_parse_filtered_file(const char *filepath, int ext);
+int should_parse_filtered_file(const char *filepath);

 scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra);

--- a/third-party/libscan/libscan/comic/comic.c
+++ b/third-party/libscan/libscan/comic/comic.c
@ -18,7 +18,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {

    int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
    if (ret != ARCHIVE_OK) {
-        CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
+        CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a));
        archive_read_free(a);
        return;
    }
@ -38,7 +38,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
                if (read != entry_size) {
                    const char *err_str = archive_error_string(a);
                    if (err_str) {
-                        CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str)
+                        CTX_LOG_ERRORF("comic.c", "Error while reading entry: %s", err_str);
                    }
                    free(buf);
                    break;
--- a/third-party/libscan/libscan/ebook/ebook.c
+++ b/third-party/libscan/libscan/ebook/ebook.c
@ -1,28 +1,34 @@
 #include "ebook.h"
 #include <mupdf/fitz.h>
-#include <pthread.h>
 #include <tesseract/capi.h>

 #include "../media/media.h"
 #include "../arc/arc.h"
 #include "../ocr/ocr.h"

+#if EBOOK_LOCKS
+#include <pthread.h>
+pthread_mutex_t Mutex;
+#endif
+
 /* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
 __thread text_buffer_t thread_buffer;
 __thread scan_ebook_ctx_t thread_ctx;

-pthread_mutex_t Mutex;
-
 static void my_fz_lock(UNUSED(void *user), int lock) {
+#if EBOOK_LOCKS
    if (lock == FZ_LOCK_FREETYPE) {
        pthread_mutex_lock(&Mutex);
    }
+#endif
 }

 static void my_fz_unlock(UNUSED(void *user), int lock) {
+#if EBOOK_LOCKS
    if (lock == FZ_LOCK_FREETYPE) {
        pthread_mutex_unlock(&Mutex);
    }
+#endif
 }


@ -48,7 +54,7 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd
    fz_catch(fzctx)err = 1;

    if (err != 0) {
-        CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
+        CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message);
        return NULL;
    }

@ -80,14 +86,14 @@ load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzd
        } fz_catch(fzctx)err = fzctx->error.errcode;

    if (err != 0) {
-        CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
+        CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message);
        fz_drop_page(fzctx, *cover);
        fz_drop_pixmap(fzctx, pixmap);
        return NULL;
    }

    if (pixmap->n != 3) {
-        CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
+        CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n);
        fz_drop_page(fzctx, *cover);
        fz_drop_pixmap(fzctx, pixmap);
        return NULL;
@ -107,7 +113,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
    if (pixmap_is_blank(pixmap)) {
        fz_drop_page(fzctx, cover);
        fz_drop_pixmap(fzctx, pixmap);
-        CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
+        CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead");
        pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
        if (pixmap == NULL) {
            return FALSE;
@ -155,8 +161,8 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
    av_init_packet(&jpeg_packet);
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

-    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
+    APPEND_LONG_META(doc, MetaThumbnail, 1);
+    ctx->store(doc->doc_id, 0, (char *) jpeg_packet.data, jpeg_packet.size);

    free(samples);
    av_packet_unref(&jpeg_packet);
@ -174,24 +180,26 @@ void fz_err_callback(void *user, const char *message) {
    document_t *doc = (document_t *) user;

    const scan_ebook_ctx_t *ctx = &thread_ctx;
-    CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message)
+    CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
 }

 void fz_warn_callback(void *user, const char *message) {
    document_t *doc = (document_t *) user;

    const scan_ebook_ctx_t *ctx = &thread_ctx;
-    CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message)
+    CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
 }

 static void init_fzctx(fz_context *fzctx, document_t *doc) {
    fz_register_document_handlers(fzctx);

+#if EBOOK_LOCKS
    static int mu_is_initialized = FALSE;
    if (!mu_is_initialized) {
        pthread_mutex_init(&Mutex, NULL);
        mu_is_initialized = TRUE;
    }
+#endif

    fzctx->warn.print_user = doc;
    fzctx->warn.print = fz_warn_callback;
@ -235,7 +243,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),

    if (img->w >= MIN_OCR_WIDTH && img->h >= MIN_OCR_HEIGHT && OCR_IS_VALID_BPP(img->n)) {
        fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
-        ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb);
+        ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, (int)pix->stride, pix->xres, fill_image_ocr_cb);
        fz_drop_pixmap(fzctx, pix);
    }
 }
@ -274,14 +282,14 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
    fz_catch(fzctx)err = fzctx->error.errcode;

    if (err) {
-        CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
+        CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message);
        fz_drop_stream(fzctx, stream);
        fz_drop_document(fzctx, fzdoc);
        fz_drop_context(fzctx);
        return;
    }

-    APPEND_LONG_META(doc, MetaPages, page_count)
+    APPEND_LONG_META(doc, MetaPages, page_count);

    if (ctx->enable_tn) {
        if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
@ -304,7 +312,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
    fz_catch(fzctx);

    if (strlen(title) > 0) {
-        APPEND_UTF8_META(doc, MetaTitle, title)
+        APPEND_UTF8_META(doc, MetaTitle, title);
    }

    char author[4096] = {'\0',};
@ -312,7 +320,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
    fz_catch(fzctx);

    if (strlen(author) > 0) {
-        APPEND_UTF8_META(doc, MetaAuthor, author)
+        APPEND_UTF8_META(doc, MetaAuthor, author);
    }


@ -326,7 +334,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
            fz_try(fzctx)page = fz_load_page(fzctx, fzdoc, current_page);
            fz_catch(fzctx)err = fzctx->error.errcode;
            if (err != 0) {
-                CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
+                CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message);
                text_buffer_destroy(&thread_buffer);
                fz_drop_page(fzctx, page);
                fz_drop_stream(fzctx, stream);
@ -355,7 +363,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
                } fz_catch(fzctx)err = fzctx->error.errcode;

            if (err != 0) {
-                CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
+                CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message);
                text_buffer_destroy(&thread_buffer);
                fz_drop_page(fzctx, page);
                fz_drop_stext_page(fzctx, stext);
@ -385,7 +393,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
        meta_content->key = MetaContent;
        memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
-        APPEND_META(doc, meta_content)
+        APPEND_META(doc, meta_content);

        text_buffer_destroy(&thread_buffer);
    }
@ -410,7 +418,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {

    int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
    if (ret != ARCHIVE_OK) {
-        CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a))
+        CTX_LOG_ERRORF(f->filepath, "(ebook.c) [%d] %s", ret, archive_error_string(a));
        archive_read_free(a);
        return;
    }
@ -431,7 +439,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
                if (read != entry_size) {
                    const char *err_str = archive_error_string(a);
                    if (err_str) {
-                        CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str)
+                        CTX_LOG_ERRORF("ebook.c", "Error while reading entry: %s", err_str);
                    }
                    free(buf);
                    break;
@ -452,7 +460,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
    meta_line_t *meta_content = malloc(sizeof(meta_line_t) + content_buffer.dyn_buffer.cur);
    meta_content->key = MetaContent;
    memcpy(meta_content->str_val, content_buffer.dyn_buffer.buf, content_buffer.dyn_buffer.cur);
-    APPEND_META(doc, meta_content)
+    APPEND_META(doc, meta_content);

    text_buffer_destroy(&content_buffer);

@ -469,7 +477,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, docume
    size_t buf_len;
    void *buf = read_all(f, &buf_len);
    if (buf == NULL) {
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

--- a/third-party/libscan/libscan/ebook/ebook.h
+++ b/third-party/libscan/libscan/ebook/ebook.h
@ -9,7 +9,6 @@ typedef struct {
    int enable_tn;
    const char *tesseract_lang;
    const char *tesseract_path;
-    pthread_mutex_t mupdf_mutex;

    log_callback_t log;
    logf_callback_t logf;
--- a/third-party/libscan/libscan/font/font.c
+++ b/third-party/libscan/libscan/font/font.c
@ -146,7 +146,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    size_t buf_len = 0;
    void *buf = read_all(f, &buf_len);
    if (buf == NULL) {
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

@ -154,7 +154,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face);
    if (err != 0) {
        CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err,
-                       FT_Error_String(err))
+                       FT_Error_String(err));
        free(buf);
        return;
    }
@ -174,7 +174,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
    meta_name->key = MetaFontName;
    strcpy(meta_name->str_val, font_name);
-    APPEND_META(doc, meta_name)
+    APPEND_META(doc, meta_name);

    if (!ctx->enable_tn) {
        FT_Done_Face(face);
@ -188,7 +188,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    err = FT_Set_Pixel_Sizes(face, 0, pixel);
    if (err != 0) {
        CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err,
-                         FT_Error_String(err))
+                         FT_Error_String(err));
        FT_Done_Face(face);
        free(buf);
        return;
@ -210,7 +210,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
            err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
            if (err != 0) {
                CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err,
-                                 FT_Error_String(err))
+                                 FT_Error_String(err));
                continue;
            }
        }
@ -231,8 +231,8 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
    dyn_buffer_t bmp_data = dyn_buffer_create();
    bmp_format(&bmp_data, dimensions, bitmap);

-    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
+    APPEND_LONG_META(doc, MetaThumbnail, 1);
+    ctx->store(doc->doc_id, 0, bmp_data.buf, bmp_data.cur);

    dyn_buffer_destroy(&bmp_data);
    free(bitmap);
--- a/third-party/libscan/libscan/json/json.c
+++ b/third-party/libscan/libscan/json/json.c
@ -32,8 +32,8 @@ int json_extract_text(cJSON *json, text_buffer_t *tex) {

 scan_code_t parse_json(scan_json_ctx_t *ctx, vfile_t *f, document_t *doc) {

-    if (f->info.st_size > JSON_MAX_FILE_SIZE) {
-        CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath)
+    if (f->st_size > JSON_MAX_FILE_SIZE) {
+        CTX_LOG_WARNINGF("json.c", "File larger than maximum allowed [%s]", f->filepath);
        return SCAN_ERR_SKIP;
    }

--- a/third-party/libscan/libscan/macros.h
+++ b/third-party/libscan/libscan/macros.h
@ -25,20 +25,20 @@
 #define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
 #define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)

-#define APPEND_STR_META(doc, keyname, value) \
+#define APPEND_STR_META(doc, keyname, value) do {\
    {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
    meta_str->key = keyname; \
    strcpy(meta_str->str_val, value); \
-    APPEND_META(doc, meta_str)}
+    APPEND_META(doc, meta_str);}} while(0)

-#define APPEND_LONG_META(doc, keyname, value) \
+#define APPEND_LONG_META(doc, keyname, value) do{\
    {meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
    meta_long->key = keyname; \
    meta_long->long_val = value; \
-    APPEND_META(doc, meta_long)}
+    APPEND_META(doc, meta_long);}} while(0)


-#define APPEND_META(doc, meta) \
+#define APPEND_META(doc, meta) do {\
    meta->next = NULL;\
    if (doc->meta_head == NULL) {\
        doc->meta_head = meta;\
@ -46,7 +46,7 @@
    } else {\
        doc->meta_tail->next = meta;\
        doc->meta_tail = meta;\
-    }
+    }}while(0)

 #define APPEND_UTF8_META(doc, keyname, str) \
    text_buffer_t tex = text_buffer_create(-1); \
@ -55,5 +55,5 @@
    meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
    meta_tag->key = keyname; \
    strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \
-    APPEND_META(doc, meta_tag) \
-    text_buffer_destroy(&tex);
+    APPEND_META(doc, meta_tag); \
+    text_buffer_destroy(&tex)
--- a/third-party/libscan/libscan/media/media.c
+++ b/third-party/libscan/libscan/media/media.c
@ -163,7 +163,7 @@ static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, i

    text_buffer_terminate_string(&tex);

-    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
+    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
    text_buffer_destroy(&tex);
    avcodec_free_context(&decoder);
 }
@ -190,7 +190,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
                    CTX_LOG_WARNINGF(doc->filepath,
                                     "(media.c) avcodec_read_frame() returned error code [%d] %s",
                                     read_frame_ret, av_err2str(read_frame_ret)
-                    )
+                    );
                }
                frame_and_packet_free(result);
                return NULL;
@ -210,7 +210,7 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
            CTX_LOG_ERRORF(doc->filepath,
                           "(media.c) avcodec_send_packet() returned error code [%d] %s",
                           decode_ret, av_err2str(decode_ret)
-            )
+            );
            frame_and_packet_free(result);
            return NULL;
        }
@ -230,7 +230,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
    while (meta != NULL) {
        if (meta->key == key) {
            CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
-                           key, meta->str_val, key, tag->value)
+                           key, meta->str_val, key, tag->value);
            return;
        }
        meta = meta->next;
@ -243,7 +243,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
    meta_tag->key = key;
    strcpy(meta_tag->str_val, tex.dyn_buffer.buf);

-    APPEND_META(doc, meta_tag)
+    APPEND_META(doc, meta_tag);
    text_buffer_destroy(&tex);
 }

@ -253,7 +253,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
 #define STRCPY_TOLOWER(dst, str) \
    strncpy(dst, str, sizeof(dst)); \
    char *ptr = dst; \
-    for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
+    for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr)

 __always_inline
 static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
@ -261,18 +261,18 @@ static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx
    AVDictionaryEntry *tag = NULL;
    while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
        char key[256];
-        STRCPY_TOLOWER(key, tag->key)
+        STRCPY_TOLOWER(key, tag->key);

        if (strcmp(key, "artist") == 0) {
-            APPEND_TAG_META(MetaArtist)
+            APPEND_TAG_META(MetaArtist);
        } else if (strcmp(key, "genre") == 0) {
-            APPEND_TAG_META(MetaGenre)
+            APPEND_TAG_META(MetaGenre);
        } else if (strcmp(key, "title") == 0) {
-            APPEND_TAG_META(MetaTitle)
+            APPEND_TAG_META(MetaTitle);
        } else if (strcmp(key, "album_artist") == 0) {
-            APPEND_TAG_META(MetaAlbumArtist)
+            APPEND_TAG_META(MetaAlbumArtist);
        } else if (strcmp(key, "album") == 0) {
-            APPEND_TAG_META(MetaAlbum)
+            APPEND_TAG_META(MetaAlbum);
        } else if (strcmp(key, "comment") == 0) {
            append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
        }
@ -291,14 +291,14 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
            if (meta_duration->long_val > INT32_MAX) {
                meta_duration->long_val = 0;
            }
-            APPEND_META(doc, meta_duration)
+            APPEND_META(doc, meta_duration);
        }

        if (pFormatCtx->bit_rate != 0) {
            meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
            meta_bitrate->key = MetaMediaBitrate;
            meta_bitrate->long_val = pFormatCtx->bit_rate;
-            APPEND_META(doc, meta_bitrate)
+            APPEND_META(doc, meta_bitrate);
        }
    }

@ -306,7 +306,7 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
    if (is_video) {
        while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
            char key[256];
-            STRCPY_TOLOWER(key, tag->key)
+            STRCPY_TOLOWER(key, tag->key);

            if (strcmp(key, "title") == 0) {
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaTitle);
@ -320,38 +320,38 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
        // EXIF metadata
        while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
            char key[256];
-            STRCPY_TOLOWER(key, tag->key)
+            STRCPY_TOLOWER(key, tag->key);

            if (strcmp(key, "artist") == 0) {
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
            } else if (strcmp(key, "imagedescription") == 0) {
                append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
            } else if (strcmp(key, "make") == 0) {
-                APPEND_TAG_META(MetaExifMake)
+                APPEND_TAG_META(MetaExifMake);
            } else if (strcmp(key, "model") == 0) {
-                APPEND_TAG_META(MetaExifModel)
+                APPEND_TAG_META(MetaExifModel);
            } else if (strcmp(key, "software") == 0) {
-                APPEND_TAG_META(MetaExifSoftware)
+                APPEND_TAG_META(MetaExifSoftware);
            } else if (strcmp(key, "fnumber") == 0) {
-                APPEND_TAG_META(MetaExifFNumber)
+                APPEND_TAG_META(MetaExifFNumber);
            } else if (strcmp(key, "focallength") == 0) {
-                APPEND_TAG_META(MetaExifFocalLength)
+                APPEND_TAG_META(MetaExifFocalLength);
            } else if (strcmp(key, "usercomment") == 0) {
-                APPEND_TAG_META(MetaExifUserComment)
+                APPEND_TAG_META(MetaExifUserComment);
            } else if (strcmp(key, "isospeedratings") == 0) {
-                APPEND_TAG_META(MetaExifIsoSpeedRatings)
+                APPEND_TAG_META(MetaExifIsoSpeedRatings);
            } else if (strcmp(key, "exposuretime") == 0) {
-                APPEND_TAG_META(MetaExifExposureTime)
+                APPEND_TAG_META(MetaExifExposureTime);
            } else if (strcmp(key, "datetime") == 0) {
-                APPEND_TAG_META(MetaExifDateTime)
+                APPEND_TAG_META(MetaExifDateTime);
            } else if (strcmp(key, "gpslatitude") == 0) {
-                APPEND_TAG_META(MetaExifGpsLatitudeDMS)
+                APPEND_TAG_META(MetaExifGpsLatitudeDMS);
            } else if (strcmp(key, "gpslatituderef") == 0) {
-                APPEND_TAG_META(MetaExifGpsLatitudeRef)
+                APPEND_TAG_META(MetaExifGpsLatitudeRef);
            } else if (strcmp(key, "gpslongitude") == 0) {
-                APPEND_TAG_META(MetaExifGpsLongitudeDMS)
+                APPEND_TAG_META(MetaExifGpsLongitudeDMS);
            } else if (strcmp(key, "gpslongituderef") == 0) {
-                APPEND_TAG_META(MetaExifGpsLongitudeRef)
+                APPEND_TAG_META(MetaExifGpsLongitudeRef);
            }
        }
    }
@ -432,11 +432,11 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
            CTX_LOG_DEBUGF(
                    doc->filepath,
                    "(media.c) Could not seek media file: %s", av_err2str(seek_ret)
-            )
+            );
        }

        if (seek_ok == FALSE && thumbnail_index != 0) {
-            CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
+            CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.");
            return SAVE_THUMBNAIL_FAILED;
        }
    }
@ -468,8 +468,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
    if (scaled_frame == STORE_AS_IS) {
        return_value = SAVE_THUMBNAIL_OK;

-        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
-                   frame_and_packet->packet->size);
+        ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size);
    } else {
        // Encode frame to jpeg
        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
@ -482,19 +481,17 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor

        // Save thumbnail
        if (thumbnail_index == 0) {
-            ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
+            ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size);
            return_value = SAVE_THUMBNAIL_OK;

        } else if (thumbnail_index > 1) {
-            return_value = SAVE_THUMBNAIL_OK;
            // TO FIX: the 2nd rendered frame is always broken, just skip it until
            //  I figure out a better fix.
            thumbnail_index -= 1;

-            char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
-            snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
+            ctx->store(doc->doc_id, thumbnail_index, jpeg_packet.data, jpeg_packet.size);

-            ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
+            return_value = SAVE_THUMBNAIL_OK;
        } else {
            return_value = SAVE_THUMBNAIL_SKIPPED;
        }
@ -525,7 +522,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
                const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);

                if (desc != NULL) {
-                    APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name)
+                    APPEND_STR_META(doc, MetaMediaAudioCodec, desc->name);
                }

                audio_stream = i;
@ -536,18 +533,18 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
                const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id);

                if (desc != NULL) {
-                    APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name)
+                    APPEND_STR_META(doc, MetaMediaVideoCodec, desc->name);
                }

                meta_line_t *meta_w = malloc(sizeof(meta_line_t));
                meta_w->key = MetaWidth;
                meta_w->long_val = stream->codecpar->width;
-                APPEND_META(doc, meta_w)
+                APPEND_META(doc, meta_w);

                meta_line_t *meta_h = malloc(sizeof(meta_line_t));
                meta_h->key = MetaHeight;
                meta_h->long_val = stream->codecpar->height;
-                APPEND_META(doc, meta_h)
+                APPEND_META(doc, meta_h);

                video_stream = i;
            }
@ -614,7 +611,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
        }

        if (number_of_thumbnails_generated > 0) {
-            APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
+            APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated);
        }

        avcodec_free_context(&decoder);
@ -628,12 +625,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_

    AVFormatContext *pFormatCtx = avformat_alloc_context();
    if (pFormatCtx == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
+        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
        return;
    }
    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
    if (res < 0) {
-        CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
+        CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res));
        avformat_close_input(&pFormatCtx);
        avformat_free_context(pFormatCtx);
        return;
@ -687,7 +684,7 @@ long memfile_seek(void *ptr, long offset, int whence) {
 }

 int memfile_open(vfile_t *f, memfile_t *mem) {
-    mem->size = f->info.st_size;
+    mem->size = f->st_size;

    mem->buf = malloc(mem->size);
    if (mem->buf == NULL) {
@ -727,7 +724,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,

    AVFormatContext *pFormatCtx = avformat_alloc_context();
    if (pFormatCtx == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
+        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
        return;
    }

@ -737,16 +734,16 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,

    const char *filepath = get_filepath_with_ext(doc, f->filepath, mime_str);

-    if (f->info.st_size <= ctx->max_media_buffer) {
+    if (f->st_size <= ctx->max_media_buffer) {
        int ret = memfile_open(f, &memfile);
        if (ret == 0) {
-            CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size)
+            CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->st_size);
            io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
        }
    }

    if (io_ctx == NULL) {
-        CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size)
+        CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->st_size);
        io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
    }

@ -755,7 +752,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc,
    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
    if (res < 0) {
        if (res != -5) {
-            CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
+            CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res));
        }
        av_free(io_ctx->buffer);
        memfile_close(&memfile);
@ -790,7 +787,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu

    AVFormatContext *pFormatCtx = avformat_alloc_context();
    if (pFormatCtx == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
+        CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()");
        return FALSE;
    }

@ -798,7 +795,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu

    int ret = memfile_open_buf(buf, buf_len, &memfile);
    if (ret == 0) {
-        CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len)
+        CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len);
        io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
    } else {
        avformat_close_input(&pFormatCtx);
@ -853,9 +850,8 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
    }

    if (scaled_frame == STORE_AS_IS) {
-        APPEND_LONG_META(doc, MetaThumbnail, 1)
-        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
-                   frame_and_packet->packet->size);
+        APPEND_LONG_META(doc, MetaThumbnail, 1);
+        ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size);
    } else {
        // Encode frame to jpeg
        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
@ -867,8 +863,8 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

        // Save thumbnail
-        APPEND_LONG_META(doc, MetaThumbnail, 1)
-        ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
+        APPEND_LONG_META(doc, MetaThumbnail, 1);
+        ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size);

        av_packet_unref(&jpeg_packet);
        avcodec_free_context(&jpeg_encoder);
--- a/third-party/libscan/libscan/mobi/scan_mobi.c
+++ b/third-party/libscan/libscan/mobi/scan_mobi.c
@ -8,7 +8,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {

    MOBIData *m = mobi_init();
    if (m == NULL) {
-        CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
+        CTX_LOG_ERROR(f->filepath, "mobi_init() failed");
        return;
    }

@ -16,7 +16,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    char* buf = read_all(f, &buf_len);
    if (buf == NULL) {
        mobi_free(m);
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

@ -24,7 +24,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    if (file == NULL) {
        mobi_free(m);
        free(buf);
-        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
+        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno);
        return;
    }

@ -33,25 +33,25 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    if (mobi_ret != MOBI_SUCCESS) {
        mobi_free(m);
        free(buf);
-        CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
+        CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret);
        return;
    }

    char *author = mobi_meta_get_author(m);
    if (author != NULL) {
-        APPEND_STR_META(doc, MetaAuthor, author)
+        APPEND_STR_META(doc, MetaAuthor, author);
        free(author);
    }
    char *title = mobi_meta_get_title(m);
    if (title != NULL) {
-        APPEND_STR_META(doc, MetaTitle, title)
+        APPEND_STR_META(doc, MetaTitle, title);
        free(title);
    }

    const size_t maxlen = mobi_get_text_maxsize(m);
    if (maxlen == MOBI_NOTSET) {
        free(buf);
-        CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
+        CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen);
        return;
    }

@ -62,7 +62,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
        mobi_free(m);
        free(content_str);
        free(buf);
-        CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
+        CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret);
        return;
    }

@ -70,7 +70,7 @@ void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
    text_buffer_append_markup(&tex, content_str);
    text_buffer_terminate_string(&tex);

-    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
+    APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);

    free(content_str);
    free(buf);
--- a/third-party/libscan/libscan/msdoc/msdoc.c
+++ b/third-party/libscan/libscan/msdoc/msdoc.c
@ -39,12 +39,12 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi
    iInitDocument(file_in, (int) buf_len);
    const char *author = szGetAuthor();
    if (author != NULL) {
-        APPEND_UTF8_META(doc, MetaAuthor, author)
+        APPEND_UTF8_META(doc, MetaAuthor, author);
    }

    const char *title = szGetTitle();
    if (title != NULL) {
-        APPEND_UTF8_META(doc, MetaTitle, title)
+        APPEND_UTF8_META(doc, MetaTitle, title);
    }
    vFreeDocument();

@ -60,7 +60,7 @@ void parse_msdoc_text(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file_in, voi
        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
        meta_content->key = MetaContent;
        memcpy(meta_content->str_val, tex.dyn_buffer.buf, tex.dyn_buffer.cur);
-        APPEND_META(doc, meta_content)
+        APPEND_META(doc, meta_content);

        text_buffer_destroy(&tex);
    }
@ -74,14 +74,14 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
    size_t buf_len;
    char *buf = read_all(f, &buf_len);
    if (buf == NULL) {
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

    FILE *file = fmemopen(buf, buf_len, "rb");
    if (file == NULL) {
        free(buf);
-        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno)
+        CTX_LOG_ERRORF(f->filepath, "fmemopen() failed (%d)", errno);
        return;
    }

--- a/third-party/libscan/libscan/ooxml/ooxml.c
+++ b/third-party/libscan/libscan/ooxml/ooxml.c
@ -39,7 +39,7 @@ int extract_text(scan_ooxml_ctx_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_
    xmlErrorPtr err = xmlGetLastError();
    if (err != NULL) {
        if (err->level == XML_ERR_FATAL) {
-            CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
+            CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message);
            return -1;
        }
    }
@ -85,13 +85,13 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);

    if (xml == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
+        CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
        return READ_PART_ERR;
    }

    xmlNode *root = xmlDocGetRootElement(xml);
    if (root == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Empty document")
+        CTX_LOG_ERROR(doc->filepath, "Empty document");
        xmlFreeDoc(xml);
        return READ_PART_ERR;
    }
@ -108,13 +108,13 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);

    if (xml == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
+        CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
        return -1;
    }

    xmlNode *root = xmlDocGetRootElement(xml);
    if (root == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Empty document")
+        CTX_LOG_ERROR(doc->filepath, "Empty document");
        xmlFreeDoc(xml);
        return -1;
    }
@ -127,7 +127,7 @@ static int read_doc_props_app(scan_ooxml_ctx_t *ctx, struct archive *a, document
            }

            if (xmlStrEqual(child->name, _X("Pages"))) {
-                APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10))
+                APPEND_LONG_META(doc, MetaPages, strtol((char *) text, NULL, 10));
            }

            xmlFree(text);
@ -144,13 +144,13 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *
                            XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);

    if (xml == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
+        CTX_LOG_ERROR(doc->filepath, "Could not parse XML");
        return -1;
    }

    xmlNode *root = xmlDocGetRootElement(xml);
    if (root == NULL) {
-        CTX_LOG_ERROR(doc->filepath, "Empty document")
+        CTX_LOG_ERROR(doc->filepath, "Empty document");
        xmlFreeDoc(xml);
        return -1;
    }
@ -163,11 +163,11 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, document_t *
            }

            if (xmlStrEqual(child->name, _X("title"))) {
-                APPEND_STR_META(doc, MetaTitle, (char *) text)
+                APPEND_STR_META(doc, MetaTitle, (char *) text);
            } else if (xmlStrEqual(child->name, _X("creator"))) {
-                APPEND_STR_META(doc, MetaAuthor, (char *) text)
+                APPEND_STR_META(doc, MetaAuthor, (char *) text);
            } else if (xmlStrEqual(child->name, _X("lastModifiedBy"))) {
-                APPEND_STR_META(doc, MetaModifiedBy, (char *) text)
+                APPEND_STR_META(doc, MetaModifiedBy, (char *) text);
            }

            xmlFree(text);
@ -190,8 +190,8 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
    char *buf = malloc(entry_size);
    archive_read_data(a, buf, entry_size);

-    APPEND_LONG_META(doc, MetaThumbnail, 1)
-    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
+    APPEND_LONG_META(doc, MetaThumbnail, 1);
+    ctx->store(doc->doc_id, 1, buf, entry_size);
    free(buf);
 }

@ -200,7 +200,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
    size_t buf_len;
    void *buf = read_all(f, &buf_len);
    if (buf == NULL) {
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

@ -209,7 +209,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {

    int ret = archive_read_open_memory(a, buf, buf_len);
    if (ret != ARCHIVE_OK) {
-        CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
+        CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a));
        archive_read_free(a);
        free(buf);
        return;
@ -250,7 +250,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
        meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
        meta->key = MetaContent;
        strcpy(meta->str_val, tex.dyn_buffer.buf);
-        APPEND_META(doc, meta)
+        APPEND_META(doc, meta);
    }

    archive_read_close(a);
--- a/third-party/libscan/libscan/raw/raw.c
+++ b/third-party/libscan/libscan/raw/raw.c
@ -83,7 +83,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
    av_init_packet(&jpeg_packet);
    avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

-    APPEND_LONG_META(doc, MetaThumbnail, 1)
+    APPEND_LONG_META(doc, MetaThumbnail, 1);
    ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);

    av_packet_unref(&jpeg_packet);
@ -100,76 +100,76 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
    libraw_data_t *libraw_lib = libraw_init(0);

    if (!libraw_lib) {
-        CTX_LOG_ERROR("raw.c", "Cannot create libraw handle")
+        CTX_LOG_ERROR("raw.c", "Cannot create libraw handle");
        return;
    }

    size_t buf_len = 0;
    void *buf = read_all(f, &buf_len);
    if (buf == NULL) {
-        CTX_LOG_ERROR(f->filepath, "read_all() failed")
+        CTX_LOG_ERROR(f->filepath, "read_all() failed");
        return;
    }

    int ret = libraw_open_buffer(libraw_lib, buf, buf_len);
    if (ret != 0) {
-        CTX_LOG_ERROR(f->filepath, "Could not open raw file")
+        CTX_LOG_ERROR(f->filepath, "Could not open raw file");
        free(buf);
        libraw_close(libraw_lib);
        return;
    }

    if (*libraw_lib->idata.model != '\0') {
-        APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model)
+        APPEND_STR_META(doc, MetaExifModel, libraw_lib->idata.model);
    }
    if (*libraw_lib->idata.make != '\0') {
-        APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make)
+        APPEND_STR_META(doc, MetaExifMake, libraw_lib->idata.make);
    }
    if (*libraw_lib->idata.software != '\0') {
-        APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
+        APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software);
    }
-    APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
-    APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
+    APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width);
+    APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height);
    char tmp[1024];
    snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
-    APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)
+    APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp);

    if (*libraw_lib->other.desc != '\0') {
-        APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc)
+        APPEND_STR_META(doc, MetaContent, libraw_lib->other.desc);
    }
    if (*libraw_lib->other.artist != '\0') {
-        APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist)
+        APPEND_STR_META(doc, MetaArtist, libraw_lib->other.artist);
    }

    struct tm *time = localtime(&libraw_lib->other.timestamp);
    strftime(tmp, sizeof(tmp), "%Y:%m:%d %H:%M:%S", time);
-    APPEND_STR_META(doc, MetaExifDateTime, tmp)
+    APPEND_STR_META(doc, MetaExifDateTime, tmp);

    snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.focal_len);
-    APPEND_STR_META(doc, MetaExifFocalLength, tmp)
+    APPEND_STR_META(doc, MetaExifFocalLength, tmp);

    snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture);
-    APPEND_STR_META(doc, MetaExifFNumber, tmp)
+    APPEND_STR_META(doc, MetaExifFNumber, tmp);

    int denominator = (int) roundf(1 / libraw_lib->other.shutter);
    snprintf(tmp, sizeof(tmp), "1/%d", denominator);
-    APPEND_STR_META(doc, MetaExifExposureTime, tmp)
+    APPEND_STR_META(doc, MetaExifExposureTime, tmp);

    libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
    double gps_longitude_dec =
            (gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref);
    snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
    if (gps_longitude_dec != 0.0) {
-        APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
+        APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp);
    }

    double gps_latitude_dec = (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref);
    snprintf(tmp, sizeof(tmp), "%.15f", gps_latitude_dec);
    if (gps_latitude_dec != 0.0) {
-        APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp)
+        APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp);
    }

-    APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
+    APPEND_STR_META(doc, MetaMediaVideoCodec, "raw");

    if (!ctx->enable_tn) {
        free(buf);
@ -179,7 +179,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {

    int unpack_ret = libraw_unpack_thumb(libraw_lib);
    if (unpack_ret != 0) {
-        CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret)
+        CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret);
        free(buf);
        libraw_close(libraw_lib);
        return;
@ -212,7 +212,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {

    ret = libraw_unpack(libraw_lib);
    if (ret != 0) {
-        CTX_LOG_ERROR(f->filepath, "Could not unpack raw file")
+        CTX_LOG_ERROR(f->filepath, "Could not unpack raw file");
        free(buf);
        libraw_close(libraw_lib);
        return;
--- a/third-party/libscan/libscan/scan.h
+++ b/third-party/libscan/libscan/scan.h
@ -6,6 +6,7 @@
 #endif

 #include <stdio.h>
+#include <string.h>
 #include <sys/stat.h>
 #include <openssl/md5.h>
 #include <openssl/sha.h>
@ -16,7 +17,7 @@

 #define UNUSED(x) __attribute__((__unused__))  x

-typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
+typedef void (*store_callback_t)(char *key, int num, void *buf, size_t buf_len);

 typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);

@ -33,24 +34,26 @@ typedef int scan_code_t;
 #define LEVEL_ERROR 3
 #define LEVEL_FATAL 4

-#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
-#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
+#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__)
+#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str)

-#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
-#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
+#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__)
+#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str)

-#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
-#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
+#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__)
+#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str)

-#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
-#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
+#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__)
+#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str)

-#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
-#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
+#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1)
+#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1)

 #define SIST_DOC_ID_LEN MD5_STR_LENGTH
 #define SIST_INDEX_ID_LEN MD5_STR_LENGTH

+#define EBOOK_LOCKS 0
+
 enum metakey {
    // String
    MetaContent = 1,
@ -100,7 +103,6 @@ typedef struct meta_line {
    union {
        char str_val[0];
        unsigned long long_val;
-        double double_val;
    };
 } meta_line_t;

@ -110,12 +112,11 @@ typedef struct document {
    unsigned long size;
    unsigned int mime;
    int mtime;
-    short base;
-    short ext;
-    char has_parent;
+    int base;
+    int ext;
    meta_line_t *meta_head;
    meta_line_t *meta_tail;
-    char *filepath;
+    char filepath[PATH_MAX * 2 + 1];
 } document_t;

 typedef struct vfile vfile_t;
@ -140,8 +141,10 @@ typedef struct vfile {
    int is_fs_file;
    int has_checksum;
    int calculate_checksum;
-    const char *filepath;
-    struct stat info;
+    char filepath[PATH_MAX * 2 + 1];
+
+    int mtime;
+    size_t st_size;

    SHA_CTX sha1_ctx;
    unsigned char sha1_digest[SHA1_DIGEST_LENGTH];
@ -158,12 +161,12 @@ typedef struct vfile {
    logf_callback_t logf;
 } vfile_t;

-typedef struct parse_job_t {
+typedef struct {
    int base;
    int ext;
    struct vfile vfile;
    char parent[SIST_DOC_ID_LEN];
-    char filepath[1];
+    char filepath[PATH_MAX * 2 + 1];
 } parse_job_t;


--- a/third-party/libscan/libscan/text/text.c
+++ b/third-party/libscan/libscan/text/text.c
@ -2,7 +2,11 @@

 scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {

-    int to_read = MIN(ctx->content_size, f->info.st_size);
+    if (ctx->content_size <= 0) {
+        return SCAN_OK;
+    }
+
+    int to_read = MIN(ctx->content_size, f->st_size);

    if (to_read <= 2) {
        return SCAN_OK;
@ -11,7 +15,7 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
    char *buf = malloc(to_read);
    int ret = f->read(f, buf, to_read);
    if (ret < 0) {
-        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
+        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret);
        free(buf);
        return SCAN_ERR_READ;
    }
@ -39,12 +43,16 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {

 scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {

-    int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size);
+    if (ctx->content_size <= 0) {
+        return SCAN_OK;
+    }
+
+    int to_read = MIN(MAX_MARKUP_SIZE, f->st_size);

    char *buf = malloc(to_read + 1);
    int ret = f->read(f, buf, to_read);
    if (ret < 0) {
-        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
+        CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret);
        free(buf);
        return SCAN_ERR_READ;
    }
--- a/third-party/libscan/libscan/util.h
+++ b/third-party/libscan/libscan/util.h
@ -325,10 +325,10 @@ static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
 }

 static void *read_all(vfile_t *f, size_t *size) {
-    void *buf = malloc(f->info.st_size);
-    *size = f->read(f, buf, f->info.st_size);
+    void *buf = malloc(f->st_size);
+    *size = f->read(f, buf, f->st_size);

-    if (*size != f->info.st_size) {
+    if (*size != f->st_size) {
        free(buf);
        return NULL;
    }
@ -358,4 +358,37 @@ static void safe_sha1_update(SHA_CTX *ctx, void *buf, size_t size) {
    }
 }

+static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_size) {
+    parse_job_t *job = (parse_job_t *) malloc(sizeof(parse_job_t));
+
+    job->parent[0] = '\0';
+
+    strcpy(job->filepath, filepath);
+    strcpy(job->vfile.filepath, filepath);
+    job->vfile.st_size = st_size;
+    job->vfile.mtime = mtime;
+
+    const char *slash = strrchr(filepath, '/');
+    if (slash == NULL) {
+        job->base = 0;
+    } else {
+        job->base = (int) (slash - filepath + 1);
+    }
+
+    const char *dot = strrchr(filepath + job->base, '.');
+    if (dot == NULL) {
+        job->ext = (int) strlen(filepath);
+    } else {
+        job->ext = (int) (dot - filepath + 1);
+    }
+
+    job->vfile.fd = -1;
+    job->vfile.is_fs_file = TRUE;
+    job->vfile.has_checksum = FALSE;
+    job->vfile.rewind_buffer_size = 0;
+    job->vfile.rewind_buffer = NULL;
+
+    return job;
+}
+
 #endif
--- a/third-party/libscan/libscan/wpd/wpd.c
+++ b/third-party/libscan/libscan/wpd/wpd.c
@ -10,14 +10,14 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {
    wpd_confidence_t conf = wpd_is_file_format_supported(stream);

    if (conf == C_WPD_CONFIDENCE_SUPPORTED_ENCRYPTION || conf == C_WPD_CONFIDENCE_UNSUPPORTED_ENCRYPTION) {
-        CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf)
+        CTX_LOG_DEBUGF("wpd.c", "File is encrypted! Password-protected WPD files are not supported yet (conf=%d)", conf);
        wpd_memory_stream_destroy(stream);
        free(buf);
        return SCAN_ERR_READ;
    }

    if (conf != C_WPD_CONFIDENCE_EXCELLENT) {
-        CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf)
+        CTX_LOG_ERRORF("wpd.c", "Unsupported file format! [%s] (conf=%d)", doc->filepath, conf);
        wpd_memory_stream_destroy(stream);
        free(buf);
        return SCAN_ERR_READ;
@ -28,11 +28,11 @@ scan_code_t parse_wpd(scan_wpd_ctx_t *ctx, vfile_t *f, document_t *doc) {

    if (res != C_WPD_OK) {
        CTX_LOG_ERRORF("wpd.c", "Error while parsing WPD file [%s] (%d)",
-                       doc->filepath, res)
+                       doc->filepath, res);
    }

    if (tex.dyn_buffer.cur != 0) {
-        APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
+        APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
    }

    text_buffer_destroy(&tex);
--- a/third-party/libscan/test/test_util.cpp
+++ b/third-party/libscan/test/test_util.cpp
@ -50,14 +50,19 @@ void cleanup(document_t *doc, vfile_t *f) {
 }

 void load_file(const char *filepath, vfile_t *f) {
-    stat(filepath, &f->info);
+    struct stat info = {};
+    stat(filepath, &info);
+
+    f->mtime = (int)info.st_mtim.tv_sec;
+    f->st_size = info.st_size;
+
    f->fd = open(filepath, O_RDONLY);

    if (f->fd == -1) {
        FAIL() << FILE_NOT_FOUND_ERR;
    }

-    f->filepath = filepath;
+    memcpy(f->filepath, filepath, sizeof(f->filepath));
    f->read = fs_read;
    f->close = fs_close;
    f->is_fs_file = TRUE;
@ -66,9 +71,9 @@ void load_file(const char *filepath, vfile_t *f) {
 }

 void load_mem(void *mem, size_t size, vfile_t *f) {
-    f->filepath = "_mem_";
+    memcpy(f->filepath, "_mem_", strlen("_mem_"));
    f->_test_data = mem;
-    f->info.st_size = (int) size;
+    f->st_size = size;
    f->read = mem_read;
    f->close = nullptr;
    f->is_fs_file = TRUE;
--- a/third-party/libscan/test/test_util.h
+++ b/third-party/libscan/test/test_util.h
@ -21,7 +21,7 @@ static void noop_log(const char *filepath, int level, char *str) {

 static size_t store_size = 0;

-static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
+static void counter_store(char* key, int num, void *value, size_t value_len) {
    store_size += value_len;
 //    char id[37];
 //    char tmp[PATH_MAX];
--- a/third-party/libscan/third-party/antiword
+++ b/third-party/libscan/third-party/antiword
@ -1 +1 @@
-Subproject commit ddb042143e72a8b789e06f09dbc897dfa9f15b82
+Subproject commit badfdac84586511d4f2b626516162d62a3625349