Better support for .doc files

Update README.md
2025-12-20 18:46:03 +00:00 · 2020-12-16 20:06:06 -05:00 · 2020-12-08 18:16:19 -05:00 · 2020-11-17 12:34:54 -05:00 · 2020-11-15 21:18:02 -05:00 · 2020-11-13 08:30:43 -05:00
50 changed files with 560 additions and 1058 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,25 +0,0 @@
-.idea
-*/thumbs
-*.cbp
-CMakeCache.txt
-CMakeFiles
-cmake-build-debug
-cmake_install.cmake
-Makefile
-*.out
-LOG
-sist2*
-index.sist2/
-bundle*.css
-bundle.js
-**/*.a
-**/vgcore.*
-build/
-.git/
-third-party/libscan/libscan-test-files/
-**/ext_ffmpeg
-**/ext_libmobi
-**/scan_a_test
-Dockerfile
-*.idx/
-VERSION
--- a/.drone.yml
+++ b/.drone.yml
@@ -1,72 +0,0 @@
-kind: pipeline
-type: docker
-name: amd64
-
-platform:
-  os: linux
-  arch: amd64
-
-steps:
-  - name: build
-    image: simon987/sist2-build
-    commands:
-      - ./ci/build.sh
-  - name: docker
-    image: plugins/docker
-    settings:
-      username:
-        from_secret: DOCKER_USER
-      password:
-        from_secret: DOCKER_PASSWORD
-      repo: simon987/sist2
-      context: ./
-      dockerfile: ./Dockerfile
-      auto_tag: true
-      auto_tag_suffix: x64-linux
-      when:
-        event:
-          - tag
-  - name: scp files
-    image: appleboy/drone-scp
-    settings:
-      host:
-        from_secret: SSH_HOST
-      port:
-        from_secret: SSH_PORT
-      user:
-        from_secret: SSH_USER
-      key:
-        from_secret: SSH_KEY
-      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
-      source:
-        - ./VERSION
-        - ./sist2-x64-linux
-        - ./sist2-x64-linux-debug
-
---
-kind: pipeline
-type: docker
-name: arm64
-
-platform:
-  arch: arm64
-
-steps:
-  - name: build
-    image: simon987/sist2-build-arm64
-    commands:
-      - ./ci/build_arm64.sh
-  - name: scp files
-    image: appleboy/drone-scp
-    settings:
-      host:
-        from_secret: SSH_HOST
-      port:
-        from_secret: SSH_PORT
-      user:
-        from_secret: SSH_USER
-      key:
-        from_secret: SSH_KEY
-      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
-      source:
-        - ./sist2-arm64-linux
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,40 +0,0 @@
---
-name: "🐞 Bug Report"
-about: Submit a bug report 
-title: ''
-labels: bug
-assignees: ''
-
---
-
-**Device Information (please complete the following information):**
- - OS: `[e.g., Ubuntu 20.04, WSL2]`
- - Deployment: `[Linux, Linux ARM64 or Docker]`
- - Browser *(if relevant)*: `[e.g., chrome, safari]`
- - SIST2 Version: `[e.g., v2.9.0]`
- - Elasticsearch Version *(if relevant)* : ``
-
-**Command with arguments** 
-<!-- `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0` -->
-
-**Describe the bug**
-<!-- A clear and concise description of what the bug is. -->
-
-**Steps To Reproduce**
-Please be specific!
-1. Go to '...'
-2. Click on '....'
-3. etc.
-
-**Expected behavior**
-<!-- A clear and concise description of what you expected to happen. -->
-
-**Actual Behavior**
-<!-- A clear and concise description of what actually happens. -->
-
-**Screenshots**
-<!-- If applicable, add screenshots to help explain your problem. -->
-
-**Additional context**
-<!-- Add any other context about the problem here. If applicable, please include why you think the bug is occurring and/or troubleshooting you have already performed. -->
-<!-- If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net. -->
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-  - name: SIST2 Documentation
-    url: https://github.com/simon987/sist2/blob/master/docs/USAGE.md
-    about: Check out the SIST2 documentation for answers to common questions
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,18 +0,0 @@
---
-name: "🚀 Feature Request"
-about: Suggest an idea for SIST2
-title: ''
-assignees: ''
-
---
-**Which SIST2 component is your Feature Request related to?**
-<!-- e.g., Scan, Index, or Web? -->
-
-**Is your feature request related to a problem? Please describe.**
-<!-- A clear and concise description of what the problem is. e.g., "I'm always frustrated when [...]" -->
-
-**What would you like to see happen?**
-<!-- A clear and concise description of what you want to happen. -->
-
-**Additional context**
-<!-- Add any other context or screenshots about the feature request here. -->
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .idea
 thumbs
+test
 *.cbp
 CMakeCache.txt
 CMakeFiles
@@ -16,5 +17,3 @@ bundle.js
 vgcore.*
 build/
 third-party/
-*.idx/
-VERSION
--- a/.teamcity/settings.kts
+++ b/.teamcity/settings.kts
@@ -0,0 +1,69 @@
+import jetbrains.buildServer.configs.kotlin.v2019_2.*
+import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
+import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
+import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
+import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
+
+/*
+The settings script is an entry point for defining a TeamCity
+project hierarchy. The script should contain a single call to the
+project() function with a Project instance or an init function as
+an argument.
+
+VcsRoots, BuildTypes, Templates, and subprojects can be
+registered inside the project using the vcsRoot(), buildType(),
+template(), and subProject() methods respectively.
+
+To debug settings scripts in command-line, run the
+
+    mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
+
+command and attach your debugger to the port 8000.
+
+To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
+-> Tool Windows -> Maven Projects), find the generate task node
+(Plugins -> teamcity-configs -> teamcity-configs:generate), the
+'Debug' option is available in the context menu for the task.
+*/
+
+version = "2019.2"
+
+project {
+
+    vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
+
+    buildType(Build)
+}
+
+object Build : BuildType({
+    name = "Build"
+
+    artifactRules = """
+        sist2
+        sist2_scan
+    """.trimIndent()
+
+    vcs {
+        root(HttpsGithubComSimon987sist2refsHeadsMaster)
+    }
+
+    steps {
+        exec {
+            name = "Build"
+            path = "./ci/build.sh"
+            dockerImage = "simon987/general_ci"
+            dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
+            dockerPull = true
+        }
+    }
+
+    triggers {
+        vcs {
+        }
+    }
+})
+
+object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
+    name = "https://github.com/simon987/sist2#refs/heads/master"
+    url = "https://github.com/simon987/sist2"
+})
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,15 +36,15 @@ add_executable(sist2
 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
 set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

-find_package(PkgConfig REQUIRED)
-
-pkg_search_module(GLIB REQUIRED glib-2.0)
-
 find_package(lmdb CONFIG REQUIRED)
 find_package(cJSON CONFIG REQUIRED)
+find_package(unofficial-glib CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
+find_library(UUID_LIB NAMES uuid)
 find_package(CURL CONFIG REQUIRED)

+#find_package(OpenSSL REQUIRED)
+

 target_include_directories(
        sist2 PUBLIC
@@ -52,7 +52,6 @@ target_include_directories(
        ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
        ${CMAKE_SOURCE_DIR}/third-party/libscan/
        ${CMAKE_SOURCE_DIR}/
-        ${GLIB_INCLUDE_DIRS}
 )

 target_compile_options(
@@ -69,8 +68,7 @@ if (SIST_DEBUG)
            -fstack-protector
            -fno-omit-frame-pointer
            -fsanitize=address
-            -fno-inline
-#            -O2
+            -O2
    )
    target_link_options(
            sist2
@@ -83,6 +81,7 @@ if (SIST_DEBUG)
            OUTPUT_NAME sist2_debug
    )
 else ()
+    #    set(VCPKG_BUILD_TYPE release)
    target_compile_options(
            sist2
            PRIVATE
@@ -105,15 +104,14 @@ target_link_libraries(
        lmdb
        cjson
        argparse
-        ${GLIB_LDFLAGS}
+        unofficial::glib::glib
        unofficial::mongoose::mongoose
        CURL::libcurl

+        ${UUID_LIB}
        pthread
        magic

-        c
-
        scan
 )

--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@@ -1,15 +1,9 @@
-FROM simon987/sist2-build as build
+FROM ubuntu:19.10
 MAINTAINER simon987 <me@simon987.net>

-WORKDIR /build/
-ADD . /build/
-RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-RUN make -j$(nproc)
-RUN strip sist2
-
-FROM ubuntu:20.10
-
-RUN apt update && apt install -y curl
+RUN apt update
+RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
+ curl libtiff5 libpng16-16 libpcre3

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@@ -18,9 +12,9 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
-    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
+    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh

-COPY --from=build /build/sist2 /root/sist2
+ADD sist2 /root/sist2

 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
--- a/Docker/build.sh
+++ b/Docker/build.sh
@@ -0,0 +1,14 @@
+rm ./sist2 sist2_debug
+cp ../sist2.gz .
+gzip -d sist2.gz
+strip sist2
+
+version=$(./sist2 --version)
+
+echo "Version ${version}"
+docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
+
+docker push simon987/sist2:${version}
+docker push simon987/sist2:latest
+
+docker run --rm simon987/sist2 -v
--- a/DockerArm64/Dockerfile
+++ b/DockerArm64/Dockerfile
@@ -1,15 +1,9 @@
-FROM simon987/sist2-build-arm64 as build
+FROM ubuntu:19.10
 MAINTAINER simon987 <me@simon987.net>

-WORKDIR /build/
-ADD . /build/
-RUN cmake -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-RUN make -j$(nproc)
-RUN strip sist2
-
-FROM ubuntu:20.10
-
-RUN apt update && apt install -y curl
+RUN apt update
+RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
+ curl libtiff5 libpng16-16 libpcre3

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@@ -18,9 +12,9 @@ RUN mkdir -p /usr/share/tessdata && \
    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
-    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata
+    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh

-COPY --from=build /build/sist2 /root/sist2
+ADD sist2_arm64 /root/sist2

 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
--- a/DockerArm64/build.sh
+++ b/DockerArm64/build.sh
@@ -0,0 +1,13 @@
+rm ./sist2_arm64
+cp ../sist2_arm64.gz .
+gzip -d sist2_arm64.gz
+
+version=$(./sist2_arm64 --version)
+
+echo "Version ${version}"
+docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
+
+docker push simon987/sist2-arm64:"${version}"
+docker push simon987/sist2-arm64:latest
+
+docker run --rm simon987/sist2-arm64 -v
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
 [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
-[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
+[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)

 **Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)

@@ -25,12 +25,14 @@ sist2 (Simple incremental search tool)
 * OCR support with tesseract \*\*\*
 * Stats page & disk utilisation visualization

+
 \* See [format support](#format-support)    
 \*\* See [Archive files](#archive-files)    
 \*\*\* See [OCR](#ocr)    

 ![stats](docs/stats.png)

+
 ## Getting Started

 1. Have an Elasticsearch (>= 6.X.X) instance running
@@ -50,13 +52,15 @@ sist2 (Simple incremental search tool)
        ```
 1. Download sist2 executable
    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
-    1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
-    1. *(or)* `docker pull simon987/sist2:2.10.1-x64-linux`
+    1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
+    1. *(or)* `docker pull simon987/sist2:latest`

 1. See [Usage guide](docs/USAGE.md)
   
+
 \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)    

+
 ## Example usage

 See [Usage guide](docs/USAGE.md) for more details
@@ -65,16 +69,17 @@ See [Usage guide](docs/USAGE.md) for more details
 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
 1. Start web interface: `sist2 web ./docs_idx`

+
 ## Format support

-File type | Library | Content | Thumbnail | Metadata
+File type | Library | Content | Thumbnail | Metadata
 :---|:---|:---|:---|:---
 pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
 cbz,cbr | *(none)* | - | yes | - |
 `audio/*` | ffmpeg | - | yes | ID3 tags |
 `video/*` | ffmpeg | - | yes | title, comment, artist |
-`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
-raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags, GPS tags |
+`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
+raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags |
 ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 `text/plain` | *(none)* | yes | no | - |
 html, xml | *(none)* | yes | no | - |
@@ -86,65 +91,46 @@ mobi, azw, azw3 | libmobi | yes | no | author, title |
 \* *See [Archive files](#archive-files)*
 
 ### Archive files
-
-**sist2** will scan files stored into archive files (zip, tar, 7z...) as if they were directly in the file system.
-Recursive (archives inside archives)
+**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
+they were directly in the file system. Recursive (archives inside archives)
 scan is also supported.

 **Limitations**:
-
 * Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) 
  is limitted (see `--mem-buffer` option)
 * Archive files are scanned sequentially, by a single thread. On systems where
-  **sist2** is not I/O bound, scans might be faster when larger archives are split into smaller parts.
+**sist2** is not I/O bound, scans might be faster when larger archives are split
+ into smaller parts.
+ 
 
 ### OCR

 You can enable OCR support for pdf,xps,fb2,epub file types with the
-`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
-directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
+`--ocr <lang>` option. Download the language data files with your
+package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).

 The `simon987/sist2` image comes with common languages 
 (hin, jpn, eng, fra, rus, spa) pre-installed.

 Examples
-
 ```bash
 sist2 scan --ocr jpn ~/Books/Manga/
 sist2 scan --ocr eng ~/Books/Textbooks/
 ```

+
 ## Build from source

-You can compile **sist2** by yourself if you don't want to use the pre-compiled binaries
-
-### With docker (recommended)
-
-```bash
-git clone --recursive https://github.com/simon987/sist2/
-cd sist2
-docker build . -f ./Dockerfile -t my-sist2-image
-docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
-```
-
-### On a linux computer
+You can compile **sist2** by yourself if you don't want to use the pre-compiled
+binaries (GCC 7+ required).

 1. Install compile-time dependencies

   ```bash
-   apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
+   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
   ```

-1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
-
-1. Install vcpkg dependencies
-
-    ```bash
-    vcpkg install curl[core,openssl]
-    vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
-    ```
-
-1. Build
+2. Build
    ```bash
    git clone --recursive https://github.com/simon987/sist2/
    cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -2,18 +2,16 @@

 VCPKG_ROOT="/vcpkg"

-rm *.gz &>/dev/null
-
-git submodule update --init --recursive
+rm *.gz

 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-make -j $(nproc)
+cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 12
 strip sist2
-./sist2 -v > VERSION
-mv sist2 sist2-x64-linux
+gzip -9 sist2

 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-make -j  $(nproc)
-mv sist2_debug sist2-x64-linux-debug
+cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 12
+cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
+tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
--- a/ci/build_arm64.sh
+++ b/ci/build_arm64.sh
@@ -2,12 +2,11 @@

 VCPKG_ROOT="/vcpkg"

-rm *.gz &>/dev/null
-
-git submodule update --init --recursive
+rm *.gz

 rm -rf CMakeFiles CMakeCache.txt
 cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
-make -j $(nproc)
+make -j 4
 strip sist2
-mv sist2 sist2-arm64-linux
+mv sist2 sist2_arm64
+gzip -9 sist2_arm64
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -46,7 +46,6 @@ Scan options
    --fast                        Only index file names & mime type
    --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
    --mem-buffer=<int>            Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
-    --read-subtitles              Read subtitles from media files

 Index options
    -t, --threads=<int>           Number of threads. DEFAULT=1
@@ -92,7 +91,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
    Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
    will be copied to the new index and will not be parsed again.
 * `-o, --output` Output directory. 
-* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) 
+* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) 
 * `--name` Set the `name` option for the web module
 * `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
 * `--archive` Archive file mode.
@@ -124,7 +123,6 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
    larger than this number will be read sequentially and no *seek* operations will be supported.

    To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
-* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.

 ### Scan examples

@@ -243,11 +241,9 @@ The `_text.*` items will be indexed and searchable as **text** fields (fuzzy sea

 *thumbs/*:

-LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
+LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
 and values are raw image bytes.

-*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
-
 Importing an external `binary` type index is technically possible but
 it is currently unsupported and has no guaranties of back/forward compatibility.

@@ -357,7 +353,8 @@ You can safely copy the `/tags/` database to another index.
 See [Automatic tagging](#automatic-tagging) for information about tag 
 hierarchies and tag colors.

-\* *It can take a few seconds to take effect in new search queries.*
+\* *It can take a few seconds to take effect in new search queries, and the page needs 
+    to be reloaded for the tags tab to update*


 ### Automatic tagging
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -30,10 +30,6 @@
    "mime": {
      "type": "keyword"
    },
-    "parent": {
-      "type": "keyword",
-      "index": false
-    },
    "thumbnail": {
      "type": "keyword",
      "index": false
@@ -165,30 +161,6 @@
    "exif_user_comment": {
      "type": "text"
    },
-    "exif_gps_longitude_ref": {
-      "type": "keyword",
-      "index": false
-    },
-    "exif_gps_longitude_dms": {
-      "type": "keyword",
-      "index": false
-    },
-    "exif_gps_longitude_dec": {
-      "type": "keyword",
-      "index": false
-    },
-    "exif_gps_latitude_ref": {
-      "type": "keyword",
-      "index": false
-    },
-    "exif_gps_latitude_dms": {
-      "type": "keyword",
-      "index": false
-    },
-    "exif_gps_latitude_dec": {
-      "type": "keyword",
-      "index": false
-    },
    "author": {
      "type": "text"
    },
--- a/scripts/reset.sh
+++ b/scripts/reset.sh
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-
-make clean
-rm -rf CMakeFiles/ CMakeCache.txt Makefile \
-  third-party/libscan/CMakeFiles third-party/libscan/CMakeCache.txt third-party/libscan/third-party/ext_ffmpeg \
-  third-party/libscan/third-party/ext_libmobi third-party/libscan/Makefile
--- a/src/cli.c
+++ b/src/cli.c
@@ -227,7 +227,6 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
    LOG_DEBUGF("cli.c", "arg path=%s", args->path)
    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
-    LOG_DEBUGF("cli.c", "arg archive_passphrase=%s", args->archive_passphrase)
    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
--- a/src/cli.h
+++ b/src/cli.h
@@ -18,7 +18,6 @@ typedef struct scan_args {
    char *path;
    char *archive;
    archive_mode_t archive_mode;
-    char *archive_passphrase;
    char *tesseract_lang;
    const char *tesseract_path;
    char *exclude_regex;
@@ -26,7 +25,6 @@ typedef struct scan_args {
    const char* treemap_threshold_str;
    double treemap_threshold;
    int max_memory_buffer;
-    int read_subtitles;
 } scan_args_t;

 scan_args_t *scan_args_create();
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -40,9 +40,6 @@ typedef struct {
    pcre_extra *exclude_extra;
    int fast;

-    GHashTable *dbg_current_files;
-    pthread_mutex_t dbg_current_files_mu;
-
    scan_arc_ctx_t arc_ctx;
    scan_comic_ctx_t comic_ctx;
    scan_ebook_ctx_t ebook_ctx;
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -30,11 +30,11 @@ void elastic_cleanup() {
    }
 }

-void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
+void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {

    cJSON *line = cJSON_CreateObject();

-    cJSON_AddStringToObject(line, "_id", id_str);
+    cJSON_AddStringToObject(line, "_id", uuid_str);
    cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
    cJSON_AddStringToObject(line, "_type", "_doc");
    cJSON_AddItemReferenceToObject(line, "_source", document);
@@ -52,13 +52,13 @@ void index_json_func(void *arg) {
    elastic_index_line(line);
 }

-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
+void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
    memcpy(bulk_line->line, json, json_len);
-    memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
+    memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
    *(bulk_line->line + json_len) = '\n';
    *(bulk_line->line + json_len + 1) = '\0';
    bulk_line->next = NULL;
@@ -67,7 +67,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
+void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]) {

    if (Indexer == NULL) {
        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@@ -129,9 +129,9 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
    while (line != NULL && *count < max) {
        char action_str[256];
        snprintf(
-                action_str, sizeof(action_str),
+                action_str, 256,
                "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
-                line->path_md5_str, Indexer->es_index
+                line->uuid_str, Indexer->es_index
        );

        size_t action_str_len = strlen(action_str);
@@ -220,7 +220,7 @@ void _elastic_flush(int max) {
    if (r->status_code == 413) {

        if (max <= 1) {
-            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
            free_response(r);
            free(buf);
            delete_queue(1);
@@ -408,9 +408,9 @@ void elastic_init(int force_reset, const char* user_mappings, const char* user_s
    }
 }

-cJSON *elastic_get_document(const char *id_str) {
+cJSON *elastic_get_document(const char *uuid_str) {
    char url[4096];
-    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);
+    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, uuid_str);

    response_t *r = web_get(url, 3);
    cJSON *json = NULL;
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -5,7 +5,7 @@

 typedef struct es_bulk_line {
    struct es_bulk_line *next;
-    char path_md5_str[MD5_STR_LENGTH];
+    char uuid_str[UUID_STR_LEN];
    char line[0];
 } es_bulk_line_t;

@@ -16,9 +16,9 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

-void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
+void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

 es_indexer_t *create_indexer(const char *url, const char *index);

@@ -27,10 +27,10 @@ void finish_indexer(char *script, int async_script, char *index_id);

 void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);

-cJSON *elastic_get_document(const char *id_str);
+cJSON *elastic_get_document(const char *uuid_str);

 char *elastic_get_status();

-void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
+void execute_update_script(const char *script, int async, const char index_id[UUID_STR_LEN]);

 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -6,22 +6,18 @@
 static __thread int index_fd = -1;

 typedef struct {
-    unsigned char path_md5[MD5_DIGEST_LENGTH];
+    unsigned char uuid[16];
+    unsigned long ino;
    unsigned long size;
    unsigned int mime;
    int mtime;
    short base;
    short ext;
-    char has_parent;
 } line_t;

-#define META_NEXT 0xFFFF
-
 void skip_meta(FILE *file) {
-    enum metakey key = 0;
-    fread(&key, sizeof(uint16_t), 1, file);
-
-    while (key != META_NEXT) {
+    enum metakey key = getc(file);
+    while (key != '\n') {
        if (IS_META_INT(key)) {
            fseek(file, sizeof(int), SEEK_CUR);
        } else if (IS_META_LONG(key)) {
@@ -30,13 +26,13 @@ void skip_meta(FILE *file) {
            while ((getc(file))) {}
        }

-        fread(&key, sizeof(uint16_t), 1, file);
+        key = getc(file);
    }
 }

 void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON *json = cJSON_CreateObject();
-    cJSON_AddStringToObject(json, "id", desc->id);
+    cJSON_AddStringToObject(json, "uuid", desc->uuid);
    cJSON_AddStringToObject(json, "version", desc->version);
    cJSON_AddStringToObject(json, "root", desc->root);
    cJSON_AddStringToObject(json, "name", desc->name);
@@ -70,7 +66,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    }

    char *buf = malloc(info.st_size + 1);
-    size_t ret = read(fd, buf, info.st_size);
+    int ret = read(fd, buf, info.st_size);
    if (ret == -1) {
        LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
    }
@@ -86,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
    descriptor.root_len = (short) strlen(descriptor.root);
    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
-    strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
+    strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
    if (cJSON_GetObjectItem(json, "type") == NULL) {
        strcpy(descriptor.type, INDEX_TYPE_BIN);
    } else {
@@ -156,20 +152,8 @@ char *get_meta_key_text(enum metakey meta_key) {
            return "thumbnail";
        case MetaPages:
            return "pages";
-        case MetaExifGpsLongitudeRef:
-            return "exif_gps_longitude_ref";
-        case MetaExifGpsLongitudeDMS:
-            return "exif_gps_longitude_dms";
-        case MetaExifGpsLongitudeDec:
-            return "exif_gps_longitude_dec";
-        case MetaExifGpsLatitudeRef:
-            return "exif_gps_latitude_ref";
-        case MetaExifGpsLatitudeDMS:
-            return "exif_gps_latitude_dms";
-        case MetaExifGpsLatitudeDec:
-            return "exif_gps_latitude_dec";
        default:
-        LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
+            return NULL;
    }
 }

@@ -199,7 +183,7 @@ void write_document(document_t *doc) {

    meta_line_t *meta = doc->meta_head;
    while (meta != NULL) {
-        dyn_buffer_write_short(&buf, (uint16_t) meta->key);
+        dyn_buffer_write_char(&buf, meta->key);

        if (IS_META_INT(meta->key)) {
            dyn_buffer_write_int(&buf, meta->int_val);
@@ -213,7 +197,7 @@ void write_document(document_t *doc) {
        meta = meta->next;
        free(tmp);
    }
-    dyn_buffer_write_short(&buf, META_NEXT);
+    dyn_buffer_write_char(&buf, '\n');

    int res = write(index_fd, buf.buf, buf.cur);
    if (res == -1) {
@@ -235,9 +219,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
    dyn_buffer_t buf = dyn_buffer_create();

    FILE *file = fopen(path, "rb");
-    while (TRUE) {
+    while (1) {
        buf.cur = 0;
-        size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
+        size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
        if (feof(file)) {
            break;
        }
@@ -245,8 +229,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        cJSON *document = cJSON_CreateObject();
        cJSON_AddStringToObject(document, "index", index_id);

-        char path_md5_str[MD5_STR_LENGTH];
-        buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
+        char uuid_str[UUID_STR_LEN];
+        uuid_unparse(line.uuid, uuid_str);

        const char *mime_text = mime_get_mime_text(line.mime);
        if (mime_text == NULL) {
@@ -263,6 +247,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        }
        dyn_buffer_write_char(&buf, '\0');

+        char full_filename[PATH_MAX];
+        strcpy(full_filename, buf.buf);
+
        cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
        if (*(buf.buf + line.ext - 1) == '.') {
            *(buf.buf + line.ext - 1) = '\0';
@@ -284,10 +271,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
            cJSON_AddStringToObject(document, "path", "");
        }

-        enum metakey key = 0;
-        fread(&key, sizeof(uint16_t), 1, file);
-        size_t ret;
-        while (key != META_NEXT) {
+        enum metakey key = getc(file);
+        size_t ret = 0;
+        while (key != '\n') {
            switch (key) {
                case MetaPages:
                case MetaWidth:
@@ -325,12 +311,6 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
                case MetaAuthor:
                case MetaModifiedBy:
                case MetaThumbnail:
-                case MetaExifGpsLongitudeDMS:
-                case MetaExifGpsLongitudeDec:
-                case MetaExifGpsLongitudeRef:
-                case MetaExifGpsLatitudeDMS:
-                case MetaExifGpsLatitudeDec:
-                case MetaExifGpsLatitudeRef:
                case MetaTitle: {
                    buf.cur = 0;
                    while ((c = getc(file)) != 0) {
@@ -346,12 +326,12 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
                LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
            }

-            fread(&key, sizeof(uint16_t), 1, file);
+            key = getc(file);
        }

        cJSON *meta_obj = NULL;
        if (IndexCtx.meta != NULL) {
-            const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
+            const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
            if (meta_string != NULL) {
                meta_obj = cJSON_Parse(meta_string);

@@ -366,7 +346,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        }

        if (IndexCtx.tags != NULL) {
-            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
+            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
            if (tags_string != NULL) {
                cJSON *tags_arr = cJSON_Parse(tags_string);
                cJSON_DeleteItemFromObject(document, "tag");
@@ -374,7 +354,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
            }
        }

-        func(document, path_md5_str);
+        func(document, uuid_str);
        cJSON_Delete(document);
        if (meta_obj) {
            cJSON_Delete(meta_obj);
@@ -402,7 +382,7 @@ const char *json_type_array_fields[] = {
 void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {

    FILE *file = fopen(path, "r");
-    while (TRUE) {
+    while (1) {
        char *line = NULL;
        size_t len;
        size_t read = getline(&line, &len, file);
@@ -422,7 +402,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
        }

        cJSON *document = cJSON_CreateObject();
-        const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
+        const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;

        for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
            cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -450,7 +430,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
            }
        }

-        func(document, id_str);
+        func(document, uuid_str);
        cJSON_Delete(document);
        cJSON_Delete(input);

@@ -458,7 +438,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
    fclose(file);
 }

-void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
+void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {

    if (strcmp(type, INDEX_TYPE_BIN) == 0) {
        read_index_bin(path, index_id, func);
@@ -471,17 +451,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
    FILE *file = fopen(filepath, "rb");
    line_t line;

-    LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
-
    while (1) {
-        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
+        size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
        if (ret != 1 || feof(file)) {
            break;
        }

-        incremental_put(table, line.path_md5, line.mtime);
+        incremental_put(table, line.ino, line.mtime);

-        while ((getc(file)) != 0) {}
+        while ((getc(file))) {}
        skip_meta(file);
    }
    fclose(file);
@@ -497,47 +475,33 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
    FILE *dst_file = fopen(dst_filepath, "ab");
    line_t line;

-    LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
-
-    while (TRUE) {
-        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
+    while (1) {
+        size_t ret = fread((void *) &line, 1, sizeof(line_t), file);
        if (ret != 1 || feof(file)) {
            break;
        }

-        // Assume that files with parents still exist.
-        //  One way to "fix" this would be to check if the parent is marked for copy but it would consistently
-        //  delete files with grandparents, which is a side-effect worse than having orphaned files
-        if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
+        if (incremental_get(copy_table, line.ino)) {
            fwrite(&line, sizeof(line), 1, dst_file);

-            // Copy filepath
-            char filepath_buf[PATH_MAX];
-            char c;
-            char *ptr = filepath_buf;
-            while ((c = (char) getc(file))) {
-                *ptr++ = c;
-            }
-            *ptr = '\0';
-            fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
-
-            // Copy tn store contents
            size_t buf_len;
-            char path_md5[MD5_DIGEST_LENGTH];
-            MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
-            char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
-            if (buf_len != 0) {
-                store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
+            char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
+            store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
            free(buf);
-            }

-            enum metakey key = 0;
+            char c;
+            while ((c = (char) getc(file))) {
+                fwrite(&c, sizeof(c), 1, dst_file);
+            }
+            fwrite("\0", sizeof(c), 1, dst_file);
+
+            enum metakey key;
            while (1) {
-                fread(&key, sizeof(uint16_t), 1, file);
-                fwrite(&key, sizeof(uint16_t), 1, dst_file);
-                if (key == META_NEXT) {
+                key = getc(file);
+                if (key == '\n') {
                    break;
                }
+                fwrite(&key, sizeof(char), 1, dst_file);

                if (IS_META_INT(key)) {
                    int val;
@@ -553,12 +517,14 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                    }
                    fwrite("\0", sizeof(c), 1, dst_file);
                }
+
+                if (ret != 1) {
+                    break;
+                }
            }
        } else {
-            while ((getc(file))) {}
            skip_meta(file);
        }
    }
    fclose(file);
-    fclose(dst_file);
 }
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -7,14 +7,14 @@
 #include <sys/syscall.h>
 #include <glib.h>

-typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
+typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);

 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);

 void write_document(document_t *doc);

-void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
+void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath);

--- a/src/io/store.c
+++ b/src/io/store.c
@@ -4,7 +4,6 @@
 store_t *store_create(char *path, size_t chunk_size) {

    store_t *store = malloc(sizeof(struct store_t));
-#if (SIST_FAKE_STORE != 1)
    store->chunk_size = chunk_size;
    pthread_rwlock_init(&store->lock, NULL);

@@ -29,39 +28,30 @@ store_t *store_create(char *path, size_t chunk_size) {
    mdb_txn_begin(store->env, NULL, 0, &txn);
    mdb_dbi_open(txn, NULL, 0, &store->dbi);
    mdb_txn_commit(txn);
-#endif

    return store;
 }

 void store_destroy(store_t *store) {

-#if (SIST_FAKE_STORE != 1)
    pthread_rwlock_destroy(&store->lock);
    mdb_close(store->env, store->dbi);
    mdb_env_close(store->env);
-#endif
    free(store);
 }

-void store_flush(store_t *store) {
-    mdb_env_sync(store->env, TRUE);
-}
-
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

    if (LogCtx.very_verbose) {
-        if (key_len == MD5_DIGEST_LENGTH) {
-            char path_md5_str[MD5_STR_LENGTH];
-            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
-            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
+        if (key_len == 16) {
+            char uuid_str[UUID_STR_LEN] = {0, };
+            uuid_unparse((unsigned char *) key, uuid_str);
+            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
        } else {
            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
        }
    }

-#if (SIST_FAKE_STORE != 1)
-
    MDB_val mdb_key;
    mdb_key.mv_data = key;
    mdb_key.mv_size = key_len;
@@ -98,13 +88,10 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
    if (put_ret != 0) {
        LOG_ERROR("store.c", mdb_strerror(put_ret))
    }
-#endif
 }

 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
    char *buf = NULL;
-
-#if (SIST_FAKE_STORE != 1)
    MDB_val mdb_key;
    mdb_key.mv_data = key;
    mdb_key.mv_size = key_len;
@@ -125,7 +112,6 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
    }

    mdb_txn_abort(txn);
-#endif
    return buf;
 }

--- a/src/io/store.h
+++ b/src/io/store.h
@@ -24,8 +24,6 @@ void store_destroy(store_t *store);

 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);

-void store_flush(store_t *store);
-
 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);

 GHashTable *store_read_all(store_t *store);
--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -20,7 +20,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,

    job->vfile.info = *info;

-    memset(job->parent, 0, MD5_DIGEST_LENGTH);
+    memset(job->parent, 0, 16);

    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
--- a/src/main.c
+++ b/src/main.c
@@ -21,7 +21,7 @@
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "2.10.2";
+static const char *const Version = "2.8.5";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
@@ -30,77 +30,13 @@ static const char *const usage[] = {
        NULL,
 };

-#include<signal.h>
-#include<unistd.h>
-
-static __sighandler_t sigsegv_handler = NULL;
-static __sighandler_t sigabrt_handler = NULL;
-
-void sig_handler(int signum) {
-
-    LogCtx.verbose = 1;
-    LogCtx.very_verbose = 1;
-
-    LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
-    LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
-
-    GHashTableIter iter;
-    g_hash_table_iter_init(&iter, ScanCtx.dbg_current_files);
-
-    void *key;
-    void *value;
-    while (g_hash_table_iter_next(&iter, &key, &value)) {
-        parse_job_t *job = value;
-
-        if (isatty(STDERR_FILENO)) {
-            LOG_DEBUGF(
-                    "*SIGNAL HANDLER*",
-                    "Thread \033[%dm[%04llX]\033[0m was working on job '%s'",
-                    31 + ((unsigned int) key) % 7, key, job->filepath
-            );
-        } else {
-            LOG_DEBUGF(
-                    "*SIGNAL HANDLER*",
-                    "THREAD [%04llX] was working on job %s",
-                    key, job->filepath
-            );
-        }
-    }
-
-    tpool_dump_debug_info(ScanCtx.pool);
-
-    LOG_INFO(
-            "*SIGNAL HANDLER*",
-            "Please consider creating a bug report at https://github.com/simon987/sist2/issues !"
-    )
-    LOG_INFO(
-            "*SIGNAL HANDLER*",
-            "sist2 is an open source project and relies on the collaboration of its users to diagnose and fix bugs"
-    )
-
-#ifndef SIST_DEBUG
-    LOG_WARNING(
-            "*SIGNAL HANDLER*",
-            "You are running sist2 in release mode! Please consider downloading the debug binary from the Github "
-            "releases page to provide additionnal information when submitting a bug report."
-    )
-#endif
-
-    if (signum == SIGSEGV && sigsegv_handler != NULL) {
-        sigsegv_handler(signum);
-    } else if (signum == SIGABRT && sigabrt_handler != NULL) {
-        sigabrt_handler(signum);
-    }
-}
-
 void init_dir(const char *dirpath) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

-    unsigned char index_md5[MD5_DIGEST_LENGTH];
-    MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
-    buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
-
+    uuid_t uuid;
+    uuid_generate(uuid);
+    uuid_unparse(uuid, ScanCtx.index.desc.uuid);
    time(&ScanCtx.index.desc.timestamp);
    strcpy(ScanCtx.index.desc.version, Version);
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -162,14 +98,6 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.arc_ctx.log = _log;
    ScanCtx.arc_ctx.logf = _logf;
    ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
-    if (args->archive_passphrase != NULL) {
-        strcpy(ScanCtx.arc_ctx.passphrase, args->archive_passphrase);
-    } else {
-        ScanCtx.arc_ctx.passphrase[0] = 0;
-    }
-
-    ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
-    pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);

    // Comic
    ScanCtx.comic_ctx.log = _log;
@@ -203,7 +131,6 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.media_ctx.logf = _logf;
    ScanCtx.media_ctx.store = _store;
    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
-    ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
    init_media();

    // OOXML
@@ -291,7 +218,7 @@ void sist2_scan(scan_args_t *args) {
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
                incremental_read(ScanCtx.original_table, file_path);
            }
        }
@@ -306,6 +233,8 @@ void sist2_scan(scan_args_t *args) {
    tpool_wait(ScanCtx.pool);
    tpool_destroy(ScanCtx.pool);

+    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
+
    if (args->incremental != NULL) {
        char dst_path[PATH_MAX];
        snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
@@ -321,7 +250,7 @@ void sist2_scan(scan_args_t *args) {
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
                incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
            }
        }
@@ -336,8 +265,6 @@ void sist2_scan(scan_args_t *args) {
        store_destroy(source_tags);
    }

-    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
-
    store_destroy(ScanCtx.index.store);
 }

@@ -400,7 +327,7 @@ void sist2_index(index_args_t *args) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
            snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
-            read_index(file_path, desc.id, desc.type, f);
+            read_index(file_path, desc.uuid, desc.type, f);
        }
    }
    closedir(dir);
@@ -410,7 +337,7 @@ void sist2_index(index_args_t *args) {
    tpool_destroy(IndexCtx.pool);

    if (!args->print) {
-        finish_indexer(args->script, args->async_script, desc.id);
+        finish_indexer(args->script, args->async_script, desc.uuid);
    }

    store_destroy(IndexCtx.tag_store);
@@ -430,7 +357,7 @@ void sist2_exec_script(exec_args_t *args) {

    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)

-    execute_update_script(args->script, args->async_script, desc.id);
+    execute_update_script(args->script, args->async_script, desc.uuid);
    free(args->script);
 }

@@ -471,9 +398,6 @@ void sist2_web(web_args_t *args) {


 int main(int argc, const char *argv[]) {
-    sigsegv_handler = signal(SIGSEGV, sig_handler);
-    sigabrt_handler = signal(SIGABRT, sig_handler);
-
    setlocale(LC_ALL, "");

    scan_args_t *scan_args = scan_args_create();
@@ -514,9 +438,6 @@ int main(int argc, const char *argv[]) {
            OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
                                                          "skip: Don't parse, list: only get file names as text, "
                                                          "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
-            OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
-                       "Passphrase for encrypted archive files"),
-
            OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
                                                             "which are installed on your machine)"),
            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
@@ -526,7 +447,6 @@ int main(int argc, const char *argv[]) {
            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
                        "Maximum memory buffer size per thread in MB for files inside archives "
                        "(see USAGE.md). DEFAULT: 2000"),
-            OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),

            OPT_GROUP("Index options"),
            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -41,45 +41,34 @@ void fs_reset(struct vfile *f) {

 #define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))

-void set_dbg_current_file(parse_job_t *job) {
-    unsigned long long pid = (unsigned long long) pthread_self();
-    pthread_mutex_lock(&ScanCtx.dbg_current_files_mu);
-    g_hash_table_replace(ScanCtx.dbg_current_files, GINT_TO_POINTER(pid), job);
-    pthread_mutex_unlock(&ScanCtx.dbg_current_files_mu);
-}
-
 void parse(void *arg) {

    parse_job_t *job = arg;
    document_t doc;

-    set_dbg_current_file(job);
+    int inc_ts = incremental_get(ScanCtx.original_table, job->vfile.info.st_ino);
+    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
+        incremental_mark_file_for_copy(ScanCtx.copy_table, job->vfile.info.st_ino);
+        return;
+    }

    doc.filepath = job->filepath;
    doc.ext = (short) job->ext;
    doc.base = (short) job->base;
-
-    char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
-    MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
-
    doc.meta_head = NULL;
    doc.meta_tail = NULL;
    doc.mime = 0;
    doc.size = job->vfile.info.st_size;
+    doc.ino = job->vfile.info.st_ino;
    doc.mtime = job->vfile.info.st_mtim.tv_sec;

-    int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
-    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
-        incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
-        return;
-    }
-
+    uuid_generate(doc.uuid);
    char *buf[MAGIC_BUF_SIZE];

    if (LogCtx.very_verbose) {
-        char path_md5_str[MD5_STR_LENGTH];
-        buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
+        char uuid_str[UUID_STR_LEN];
+        uuid_unparse(doc.uuid, uuid_str);
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
    }

    if (job->vfile.info.st_size == 0) {
@@ -97,8 +86,7 @@ void parse(void *arg) {

        // Get mime type with libmagic
        if (!job->vfile.is_fs_file) {
-            LOG_WARNING(job->filepath,
-                        "Guessing mime type with libmagic inside archive files is not currently supported");
+            LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
            goto abort;
        }

@@ -181,15 +169,11 @@ void parse(void *arg) {
    abort:

    //Parent meta
-    if (!md5_digest_is_null(job->parent)) {
-        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
+    if (!uuid_is_null(job->parent)) {
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
        meta_parent->key = MetaParent;
-        buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
+        uuid_unparse(job->parent, meta_parent->str_val);
        APPEND_META((&doc), meta_parent)
-
-        doc.has_parent = TRUE;
-    } else {
-        doc.has_parent = FALSE;
    }

    write_document(&doc);
--- a/src/parsing/sidecar.c
+++ b/src/parsing/sidecar.c
@@ -7,7 +7,7 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    LOG_DEBUGF("sidecar.c", "Parsing sidecar file %s", vfile->filepath)

    size_t size;
-    char *buf = read_all(vfile, &size);
+    char* buf = read_all(vfile, &size);
    if (buf == NULL) {
        LOG_ERRORF("sidecar.c", "Read error for %s", vfile->filepath)
        return;
@@ -23,11 +23,11 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
    }
    char *json_str = cJSON_PrintUnformatted(json);

-    unsigned char path_md5[MD5_DIGEST_LENGTH];
-    MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
-        path_md5);
+    char filepath[PATH_MAX];
+    memcpy(filepath, vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len);
+    *(filepath + doc->ext - 1) = '\0';

-    store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
+    store_write(ScanCtx.index.meta_store, filepath, doc->ext, json_str, strlen(json_str) + 1);

    cJSON_Delete(json);
    free(json_str);
--- a/src/sist.h
+++ b/src/sist.h
@@ -23,10 +23,9 @@
 #undef ABS
 #define ABS(a)	   (((a) < 0) ? -(a) : (a))

+#define UUID_STR_LEN 37
 #define UNUSED(x) __attribute__((__unused__))  x

-#define MD5_STR_LENGTH 33
-
 #include "util.h"
 #include "log.h"
 #include "types.h"
@@ -48,4 +47,5 @@
 #include <errno.h>
 #include <ctype.h>

+
 #endif
--- a/src/static/js/8_md5.min.js
+++ b/src/static/js/8_md5.min.js
@@ -1 +0,0 @@
-!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32*n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8*n.length,e=0;e<r;e+=8)t[e>>5]|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8*t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8*n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);
--- a/src/static/js/dom.js
+++ b/src/static/js/dom.js
@@ -22,7 +22,7 @@ function gifOver(thumbnail, hit) {
    thumbnail.addEventListener("mouseout", function () {
        //Reset timer
        thumbnail.mouseStayedOver = false;
-        thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
+        thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
    })
 }

@@ -192,19 +192,6 @@ function makeUserTag(tag, hit) {
    return userTag;
 }

-function makeGpsMetaRow(tbody, latitude, longitude) {
-    tbody.append($("<tr>")
-        .append($("<td>").text("Exif GPS"))
-        .append($("<td>")
-            .append($("<a>")
-                .text(`${latitude}, ${longitude}`)
-                .attr("href", `https://maps.google.com/?q=${latitude},${longitude}&ll=${latitude},${longitude}&t=k&z=17`)
-                .attr("target", "_blank")
-            )
-        )
-    );
-}
-
 function infoButtonCb(hit) {
    return () => {
        getDocumentInfo(hit["_id"]).then(doc => {
@@ -242,25 +229,13 @@ function infoButtonCb(hit) {
                    .text(new Date(doc["mtime"] * 1000).toISOString().split(".")[0].replace("T", " "))
                    .attr("title", doc["mtime"]))
            );
-
-            // Exif GPS
-            if ("exif_gps_longitude_dec" in doc) {
-                makeGpsMetaRow(tbody, doc["exif_gps_latitude_dec"], doc["exif_gps_longitude_dec"])
-            } else if ("exif_gps_longitude_dms" in doc) {
-                makeGpsMetaRow(
-                    tbody,
-                    dmsToDecimal(doc["exif_gps_latitude_dms"], doc["exif_gps_latitude_ref"]),
-                    dmsToDecimal(doc["exif_gps_longitude_dms"], doc["exif_gps_longitude_ref"]),
-                )
-            }
-
            const displayFields = new Set([
                "mime", "size", "path", "title", "width", "height", "duration", "audioc", "videoc",
                "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
                "modified_by", "pages"
            ]);
            Object.keys(doc)
-                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || (key.startsWith("exif_") && !key.includes("gps")))
+                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
                .forEach(key => {
                    tbody.append($("<tr>")
                        .append($("<td>").text(key))
@@ -375,14 +350,6 @@ function createDocCard(hit) {
            audio.setAttribute("controls", "");
            audio.setAttribute("type", hit["_source"]["mime"]);
            audio.setAttribute("src", "f/" + hit["_id"]);
-            audio.addEventListener("play", () => {
-                // Pause all currently playing audio tags
-                $("audio").each(function () {
-                    if (this !== audio) {
-                        this.pause();
-                    }
-                });
-            });

            docCard.appendChild(audio)
        }
@@ -452,7 +419,7 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
            thumbnail.setAttribute("class", "card-img-top fit");
        }
    }
-    thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_path_md5"]}`);
+    thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);

    if (shouldDisplayRawImage(hit)) {
        thumbnail.addEventListener("click", () => {
--- a/src/static/js/search.js
+++ b/src/static/js/search.js
@@ -165,9 +165,6 @@ window.onload = () => {
            }
        }
    });
-
-    initTagTree();
-    updateTagTree();
 };

 function saveTag(tag, hit) {
@@ -177,7 +174,7 @@ function saveTag(tag, hit) {
        delete: false,
        name: tag,
        doc_id: hit["_id"],
-        path_md5: md5(relPath)
+        relpath: relPath
    }).then(() => {
        tagBar.blur();
        $("#tagModal").modal("hide");
@@ -191,8 +188,6 @@ function saveTag(tag, hit) {
            hideAfter: 3000,
            loaderBg: "#08c7e8",
        });
-
-        window.setTimeout(updateTagTree, 2000);
    })
 }

@@ -203,7 +198,7 @@ function deleteTag(tag, hit) {
        delete: true,
        name: tag,
        doc_id: hit["_id"],
-        path_md5: md5(relPath)
+        relpath: relPath
    }).then(() => {
        $.toast({
            heading: "Tag deleted",
@@ -215,8 +210,6 @@ function deleteTag(tag, hit) {
            hideAfter: 3000,
            loaderBg: "#08c7e8",
        });
-
-        window.setTimeout(updateTagTree, 2000);
    })
 }

@@ -320,8 +313,25 @@ $.jsonPost("es", {
    mimeTree.node("any").select();
 });

-function initTagTree() {
-    tagMap = [{text: "All", id: "any"}];
+// Tags tree
+$.jsonPost("es", {
+    aggs: {
+        tags: {
+            terms: {
+                field: "tag",
+                size: 10000
+            }
+        }
+    },
+    size: 0,
+}).then(resp => {
+    resp["aggregations"]["tags"]["buckets"]
+        .sort((a, b) => a["key"].localeCompare(b["key"]))
+        .forEach(bucket => {
+            addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
+        });
+
+    tagMap.push({"text": "All", "id": "any"});
    tagTree = new InspireTree({
        selection: {
            mode: 'checkbox'
@@ -336,34 +346,8 @@ function initTagTree() {
    });
    tagTree.on("node.state.changed", handleTreeClick(tagTree));
    tagTree.node("any").select();
-}
-
-function updateTagTree() {
-    $.jsonPost("es", {
-        aggs: {
-            tags: {
-                terms: {
-                    field: "tag",
-                    size: 10000
-                }
-            }
-        },
-        size: 0,
-    }).then(resp => {
-        tagMap = [];
-        resp["aggregations"]["tags"]["buckets"]
-            .sort((a, b) => a["key"].localeCompare(b["key"]))
-            .forEach(bucket => {
-                addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
-            });
-
-        tagTree.removeAll();
-        tagMap.push({text: "All", id: "any"})
-        tagTree.addNodes(tagMap);
    searchBusy = false;
-    });
-}
-
+});

 function addTag(map, tag, id, count) {
    // let tags = tag.split("#")[0].split(".");
@@ -511,8 +495,8 @@ function search(after = null) {
        searchResults.appendChild(preload);
    }

-    let searchBarValue = searchBar.value;
-    let empty = searchBarValue === "";
+    let query = searchBar.value;
+    let empty = query === "";
    let condition = empty ? "should" : "must";
    let filters = [
        {range: {size: {gte: size_min, lte: size_max}}},
@@ -561,32 +545,19 @@ function search(after = null) {
        filters.push({range: {mtime: {lte: date_max}}})
    }

-    let query;
-    if (CONF.options.queryMode === "simple") {
-        query = {
-            simple_query_string: {
-                query: searchBarValue,
-                fields: fields,
-                default_operator: "and"
-            }
-        }
-    } else {
-        query = {
-            query_string: {
-                query: searchBarValue,
-                default_field: "name",
-                default_operator: "and"
-            }
-        }
-    }
-
    let q = {
        "_source": {
            excludes: ["content", "_tie"]
        },
        query: {
            bool: {
-                [condition]: query,
+                [condition]: {
+                    simple_query_string: {
+                        query: query,
+                        fields: fields,
+                        default_operator: "and"
+                    }
+                },
                filter: filters
            }
        },
@@ -624,9 +595,7 @@ function search(after = null) {
        }
    }

-    const showError = CONF.options.queryMode === "advanced";
-
-    $.jsonPost("es", q, showError).then(searchResult => {
+    $.jsonPost("es", q).then(searchResult => {
        let hits = searchResult["hits"]["hits"];
        if (hits) {
            lastDoc = hits[hits.length - 1];
@@ -635,7 +604,6 @@ function search(after = null) {
        hits.forEach(hit => {
            hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
            hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
-            hit["_path_md5"] = md5(hit["_source"]["path"] + (hit["_source"]["path"] ? "/" : "") + hit["_source"]["name"] + ext(hit));
        });

        if (!after) {
@@ -660,25 +628,7 @@ function search(after = null) {
        reachedEnd = hits.length !== SIZE;
        insertHits(resultContainer, hits);
        searchBusy = false;
-    }).fail(() => {
-        searchBusy = false;
-        if (!after) {
-            preload.remove();
-        }
-
-        console.log("QUERY:")
-        console.log(q)
-        $.toast({
-            heading: "Query error",
-            text: "Could not parse or execute query, please check the Advanced search documentation. " +
-                "See server logs for more information.",
-            stack: false,
-            bgColor: "#FF8F00",
-            textColor: "#FFF3E0",
-            position: 'bottom-right',
-            hideAfter: false
    });
-    })
 }


--- a/src/static/js/util.js
+++ b/src/static/js/util.js
@@ -70,7 +70,7 @@ function strUnescape(str) {

    for (let i = 0; i < str.length; i++) {
        const c = str[i];
-        const next = str[i + 1];
+        const next = str[i+1];

        if (c === ']') {
            if (next === ']') {
@@ -102,8 +102,7 @@ const _defaults = {
    treemapSize: "large",
    suggestPath: true,
    fragmentSize: 100,
-    columns: 5,
-    queryMode: "simple"
+    columns: 5
 };

 function loadSettings() {
@@ -121,7 +120,6 @@ function loadSettings() {
    $("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
    $("#settingFragmentSize").val(CONF.options.fragmentSize);
    $("#settingColumns").val(CONF.options.columns);
-    $("#settingQueryMode").val(CONF.options.queryMode);
 }

 function Settings() {
@@ -129,7 +127,6 @@ function Settings() {

    this._onUpdate = function () {
        $("#fuzzyToggle").prop("checked", this.options.fuzzy);
-        $("#searchBar").attr("placeholder", this.options.queryMode === "simple" ? "Search" : "Advanced search");
        updateColumnStyle();
    };

@@ -168,7 +165,6 @@ function updateSettings() {
    CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
    CONF.options.fragmentSize = $("#settingFragmentSize").val();
    CONF.options.columns = $("#settingColumns").val();
-    CONF.options.queryMode = $("#settingQueryMode").val();
    CONF.save();

    if (typeof searchDebounced !== "undefined") {
@@ -191,16 +187,14 @@ function updateSettings() {
    });
 }

-jQuery["jsonPost"] = function (url, data, showError = true) {
+jQuery["jsonPost"] = function (url, data) {
    return jQuery.ajax({
        url: url,
        type: "post",
        data: JSON.stringify(data),
        contentType: "application/json"
    }).fail(err => {
-        if (showError) {
        showEsError();
-        }
        console.log(err);
    });
 };
@@ -236,13 +230,3 @@ function updateColumnStyle() {
        `
    }
 }
-
-function dmsToDecimal(dms, ref) {
-    const tokens = dms.split(",")
-
-    const d = Number(tokens[0].trim().split(":")[0]) / Number(tokens[0].trim().split(":")[1])
-    const m = Number(tokens[1].trim().split(":")[0]) / Number(tokens[1].trim().split(":")[1])
-    const s = Number(tokens[2].trim().split(":")[0]) / Number(tokens[2].trim().split(":")[1])
-
-    return (d + (m / 60) + (s / 3600)) * (ref === "S" || ref === "W" ? -1 : 1)
-}
--- a/src/static/search.html
+++ b/src/static/search.html
@@ -12,7 +12,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.10.2</span>
+    <span class="badge badge-pill version">2.8.5</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a class="btn ml-auto" href="stats">Stats</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
@@ -120,8 +120,6 @@
                </div>
                <div class="modal-body">

-                    <h2>Simple search</h2>
-
                    <table class="table">
                        <tbody>
                        <tr>
@@ -170,12 +168,6 @@
                    <p>For more information, see <a target="_blank"
                                                    href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html">Elasticsearch
                        documentation</a></p>
-
-                    <h2>Advanced search</h2>
-                    <p>For documentation about the advanced search mode, see <a target="_blank"
-                                                                                href="//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax">Elasticsearch
-                        documentation</a></p>
-
                </div>
            </div>
        </div>
@@ -215,16 +207,10 @@

                    <br/>
                    <div class="form-group">
-                        <label for="settingFragmentSize">Highlight context size in characters</label>
                        <input type="number" class="form-control" id="settingFragmentSize">
+                        <label for="settingFragmentSize">Highlight context size in characters</label>
                    </div>

-                    <label for="settingQueryMode">Search mode</label>
-                    <select id="settingQueryMode" class="form-control form-control-sm">
-                        <option value="simple">Simple</option>
-                        <option value="advanced">Advanced</option>
-                    </select>
-
                    <label for="settingDisplay">Display</label>
                    <select id="settingDisplay" class="form-control form-control-sm">
                        <option value="grid">Grid</option>
--- a/src/static/stats.html
+++ b/src/static/stats.html
@@ -10,7 +10,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.10.2</span>
+    <span class="badge badge-pill version">2.8.5</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a style="margin-left: auto" class="btn" href="/">Back</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -29,13 +29,13 @@
    </div>

    <div id="treemap-card" class="stats-card">
-        <button class="btn stats-btn" onclick="fullScreen('treemap-card')" id="treemap-card-enlarge">Enlarge</button>
+        <button class="btn stats-btn" onclick="fullScreen('treemap-card')">Enlarge</button>
        <button class="btn stats-btn" onclick="exportTreemap()">Export</button>
        <svg id="treemap"></svg>
    </div>

    <div id="graphs-card" class="stats-card">
-        <button class="btn stats-btn" onclick="fullScreen('graphs-card')" id="graphs-card-enlarge">Enlarge</button>
+        <button class="btn stats-btn" onclick="fullScreen('graphs-card')">Enlarge</button>
        <div class="graph">
            <svg id="agg_mime_size"></svg>
        </div>
@@ -84,16 +84,10 @@

                <br/>
                <div class="form-group">
-                    <label for="settingFragmentSize">Highlight context size in characters</label>
                    <input type="number" class="form-control" id="settingFragmentSize">
+                    <label for="settingFragmentSize">Highlight context size in characters</label>
                </div>

-                <label for="settingQueryMode">Search mode</label>
-                <select id="settingQueryMode" class="form-control form-control-sm">
-                    <option value="simple">Simple</option>
-                    <option value="advanced">Advanced</option>
-                </select>
-
                <label for="settingDisplay">Display</label>
                <select id="settingDisplay" class="form-control form-control-sm">
                    <option value="grid">Grid</option>
@@ -795,15 +789,7 @@ window.onload = function () {

 function fullScreen(selector) {
    const card = document.getElementById(selector);
-    const btn = document.getElementById(selector + "-enlarge");
-
    card.classList.toggle("full-screen");
-
-    if (card.classList.contains("full-screen")) {
-        btn.innerText = "Shrink";
-    } else {
-        btn.innerText = "Enlarge";
-    }
 }

 function exportTreemap() {
--- a/src/stats.c
+++ b/src/stats.c
@@ -2,6 +2,8 @@
 #include "io/serialize.h"
 #include "ctx.h"

+#include <glib.h>
+
 static GHashTable *FlatTree;
 static GHashTable *BufferTable;

@@ -20,7 +22,7 @@ typedef struct {
    long count;
 } agg_t;

-void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
+void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {

    if (cJSON_GetObjectItem(document, "parent") != NULL) {
        return;
@@ -101,8 +103,8 @@ void read_index_into_tables(index_t *index) {
    while ((de = readdir(dir)) != NULL) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
-            snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
-            read_index(file_path, index->desc.id, index->desc.type, fill_tables);
+            snprintf(file_path, PATH_MAX, "%s/%s", index->path, de->d_name);
+            read_index(file_path, index->desc.uuid, index->desc.type, fill_tables);
        }
    }
    closedir(dir);
--- a/src/tpool.c
+++ b/src/tpool.c
@@ -3,7 +3,7 @@
 #include "sist.h"
 #include <pthread.h>

-#define MAX_QUEUE_SIZE 1000000
+#define MAX_QUEUE_SIZE 10000

 typedef void (*thread_func_t)(void *arg);

@@ -52,13 +52,6 @@ static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
    return work;
 }

-void tpool_dump_debug_info(tpool_t *pool) {
-    LOG_DEBUGF("tpool.c", "pool->thread_cnt = %d", pool->thread_cnt)
-    LOG_DEBUGF("tpool.c", "pool->work_cnt = %d", pool->work_cnt)
-    LOG_DEBUGF("tpool.c", "pool->done_cnt = %d", pool->done_cnt)
-    LOG_DEBUGF("tpool.c", "pool->stop = %d", pool->stop)
-}
-
 /**
 * Pop work object from thread pool
 */
@@ -90,7 +83,7 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
    }

    while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
-        usleep(10000);
+        usleep(100000);
    }

    pthread_mutex_lock(&(pool->work_mutex));
@@ -157,7 +150,6 @@ static void *tpool_worker(void *arg) {
    if (pool->cleanup_func != NULL) {
        LOG_INFO("tpool.c", "Executing cleanup function")
        pool->cleanup_func();
-        LOG_DEBUG("tpool.c", "Done executing cleanup function")
    }

    pthread_cond_signal(&(pool->working_cond));
--- a/src/tpool.h
+++ b/src/tpool.h
@@ -10,12 +10,10 @@ typedef void (*thread_func_t)(void *arg);

 tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
 void tpool_start(tpool_t *pool);
-void tpool_destroy(tpool_t *pool);
+void tpool_destroy(tpool_t *tm);

 int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
-void tpool_wait(tpool_t *pool);
-
-void tpool_dump_debug_info(tpool_t *pool);
+void tpool_wait(tpool_t *tm);

 #endif

--- a/src/types.h
+++ b/src/types.h
@@ -6,7 +6,7 @@
 #define INDEX_VERSION_EXTERNAL "_external_v1"

 typedef struct index_descriptor {
-    char id[MD5_STR_LENGTH];
+    char uuid[UUID_STR_LEN];
    char version[64];
    long timestamp;
    char root[PATH_MAX];
--- a/src/util.c
+++ b/src/util.c
@@ -2,6 +2,7 @@
 #include "src/ctx.h"

 #include <wordexp.h>
+#include <glib.h>

 #define PBSTR "========================================"
 #define PBWIDTH 40
@@ -124,7 +125,7 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size) {
 }

 GHashTable *incremental_get_table() {
-    GHashTable *file_table = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
+    GHashTable *file_table = g_hash_table_new(g_direct_hash, g_direct_equal);
    return file_table;
 }

--- a/src/util.h
+++ b/src/util.h
@@ -10,8 +10,6 @@
 #include "third-party/utf8.h/utf8.h"
 #include "libscan/scan.h"

-#define MD5_STR_LENGTH 33
-

 char *abspath(const char *path);

@@ -23,6 +21,25 @@ void progress_bar_print(double percentage, size_t tn_size, size_t index_size);

 GHashTable *incremental_get_table();

+__always_inline
+static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
+    g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
+}
+
+__always_inline
+static int incremental_get(GHashTable *table, unsigned long inode_no) {
+    if (table != NULL) {
+        return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
+    } else {
+        return 0;
+    }
+}
+
+__always_inline
+static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
+    return g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
+}
+

 const char *find_file_in_paths(const char **paths, const char *filename);

@@ -31,95 +48,4 @@ void str_escape(char *dst, const char *str);

 void str_unescape(char *dst, const char *str);

-static int hex2buf(const char *str, int len, unsigned char *bytes) {
-    static const uint8_t hashmap[] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-            0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-    };
-
-    for (int pos = 0; pos < len; pos += 2) {
-        int idx0 = (uint8_t) str[pos + 0];
-        int idx1 = (uint8_t) str[pos + 1];
-        bytes[pos / 2] = (uint8_t) (hashmap[idx0] << 4) | hashmap[idx1];
-    }
-    return TRUE;
-}
-
-__always_inline
-static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
-    static const char hexdig[] = "0123456789abcdef";
-
-    const unsigned char *p;
-    size_t i;
-
-    char *s = hex_string;
-    for (i = 0, p = buf; i < buflen; i++, p++) {
-        *s++ = hexdig[(*p >> 4) & 0x0f];
-        *s++ = hexdig[*p & 0x0f];
-    }
-    *s = '\0';
-}
-
-
-__always_inline
-static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
-    return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
-}
-
-
-__always_inline
-static void incremental_put(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
-    g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
-}
-
-__always_inline
-static int incremental_get(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
-    if (table != NULL) {
-        char md5_str[MD5_STR_LENGTH];
-        buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
-        return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
-    } else {
-        return 0;
-    }
-}
-
-__always_inline
-static int incremental_mark_file_for_copy(GHashTable *table, unsigned char path_md5[MD5_DIGEST_LENGTH]) {
-    char *ptr = malloc(MD5_STR_LENGTH);
-    buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
-    return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
-}
-
 #endif
--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -8,8 +8,18 @@

 #include <src/ctx.h>

+#include <mongoose.h>

-static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
+
+static int has_prefix(const struct mg_str *str, const struct mg_str *prefix) {
+    return str->len > prefix->len && memcmp(str->p, prefix->p, prefix->len) == 0;
+}
+
+static int is_equal(const struct mg_str *s1, const struct mg_str *s2) {
+    return s1->len == s2->len && memcmp(s1->p, s2->p, s2->len) == 0;
+}
+
+static void send_response_line(struct mg_connection *nc, int status_code, int length, char *extra_headers) {
    mg_printf(
            nc,
            "HTTP/1.1 %d %s\r\n"
@@ -26,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t

 index_t *get_index_by_id(const char *index_id) {
    for (int i = WebCtx.index_count; i >= 0; i--) {
-        if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
+        if (strcmp(index_id, WebCtx.indices[i].desc.uuid) == 0) {
            return &WebCtx.indices[i];
        }
    }
@@ -52,32 +62,36 @@ store_t *get_tag_store(const char *index_id) {
 void search_index(struct mg_connection *nc) {
    send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
    mg_send(nc, search_html, sizeof(search_html));
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

 void stats(struct mg_connection *nc) {
    send_response_line(nc, 200, sizeof(stats_html), "Content-Type: text/html");
    mg_send(nc, stats_html, sizeof(stats_html));
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
+void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
-        mg_http_reply(nc, 404, "", "");
+    if (path->len != UUID_STR_LEN + 4) {
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_uuid[UUID_STR_LEN];
+    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
+    *(arg_uuid + UUID_STR_LEN - 1) = '\0';

-    index_t *index = get_index_by_id(arg_md5);
+    index_t *index = get_index_by_id(arg_uuid);
    if (index == NULL) {
-        mg_http_reply(nc, 404, "", "");
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    const char *file;
-    switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
+    switch (atoi(hm->uri.p + 3 + UUID_STR_LEN)) {
        case 1:
            file = "treemap.csv";
            break;
@@ -91,41 +105,54 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
            file = "date_agg.csv";
            break;
        default:
+            nc->flags |= MG_F_SEND_AND_CLOSE;
            return;
    }

    char disposition[8192];
-    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"\r\n", file);
+    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);

    char full_path[PATH_MAX];
    strcpy(full_path, index->path);
    strcat(full_path, file);

-    mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
+    mg_http_serve_file(nc, hm, full_path, mg_mk_str("text/csv"), mg_mk_str(disposition));
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

 void javascript_lib(struct mg_connection *nc) {
    send_response_line(nc, 200, sizeof(bundle_js), "Content-Type: application/javascript");
    mg_send(nc, bundle_js, sizeof(bundle_js));
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

 void javascript_search(struct mg_connection *nc) {
    send_response_line(nc, 200, sizeof(search_js), "Content-Type: application/javascript");
    mg_send(nc, search_js, sizeof(search_js));
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-int client_requested_dark_theme(struct mg_http_message *hm) {
-    struct mg_str *cookie_header = mg_http_get_header(hm, "cookie");
+int client_requested_dark_theme(struct http_message *hm) {
+    struct mg_str *cookie_header = mg_get_http_header(hm, "cookie");
    if (cookie_header == NULL) {
        return FALSE;
    }

-    struct mg_str sist_cookie = mg_http_get_header_var(*cookie_header, mg_str_n("sist", 4));
+    char buf[4096];
+    char *sist_cookie = buf;
+    if (mg_http_parse_header2(cookie_header, "sist", &sist_cookie, sizeof(buf)) == 0) {
+        return FALSE;
+    }

-    return mg_strcmp(sist_cookie, mg_str_n("dark", 4)) == 0;
+    int ret = strcmp(sist_cookie, "dark") == 0;
+    if (sist_cookie != buf) {
+        free(sist_cookie);
+    }
+
+    return ret;
 }

-void style(struct mg_connection *nc, struct mg_http_message *hm) {
+void style(struct mg_connection *nc, struct http_message *hm) {

    if (client_requested_dark_theme(hm)) {
        send_response_line(nc, 200, sizeof(bundle_dark_css), "Content-Type: text/css");
@@ -134,9 +161,11 @@ void style(struct mg_connection *nc, struct mg_http_message *hm) {
        send_response_line(nc, 200, sizeof(bundle_css), "Content-Type: text/css");
        mg_send(nc, bundle_css, sizeof(bundle_css));
    }
+
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm) {
+void img_sprite_skin_flat(struct mg_connection *nc, struct http_message *hm) {
    if (client_requested_dark_theme(hm)) {
        send_response_line(nc, 200, sizeof(sprite_skin_flat_dark_png), "Content-Type: image/png");
        mg_send(nc, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
@@ -144,59 +173,71 @@ void img_sprite_skin_flat(struct mg_connection *nc, struct mg_http_message *hm)
        send_response_line(nc, 200, sizeof(sprite_skin_flat_png), "Content-Type: image/png");
        mg_send(nc, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
    }
+
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
+void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (hm->uri.len != 68) {
-        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+    if (path->len != UUID_STR_LEN * 2 + 2) {
+        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_file_md5[MD5_STR_LENGTH];
-    char arg_index[MD5_STR_LENGTH];
+    char arg_uuid[UUID_STR_LEN];
+    char arg_index[UUID_STR_LEN];

-    memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
-    memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
-    *(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
+    memcpy(arg_index, hm->uri.p + 3, UUID_STR_LEN);
+    *(arg_index + UUID_STR_LEN - 1) = '\0';
+    memcpy(arg_uuid, hm->uri.p + 3 + UUID_STR_LEN, UUID_STR_LEN);
+    *(arg_uuid + UUID_STR_LEN - 1) = '\0';

-    unsigned char md5_buf[MD5_DIGEST_LENGTH];
-    hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
+    uuid_t uuid;
+    int ret = uuid_parse(arg_uuid, uuid);
+    if (ret != 0) {
+        LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return;
+    }

    store_t *store = get_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
-        mg_http_reply(nc, 404, "", "Not found");
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    size_t data_len = 0;
-    char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
+    char *data = store_read(store, (char *) uuid, sizeof(uuid_t), &data_len);
    if (data_len != 0) {
        send_response_line(nc, 200, data_len, "Content-Type: image/jpeg");
        mg_send(nc, data, data_len);
        free(data);
    }
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void search(struct mg_connection *nc, struct mg_http_message *hm) {
+void search(struct mg_connection *nc, struct http_message *hm) {

    if (hm->body.len == 0) {
        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
-        mg_http_reply(nc, 500, "", "Invalid request");
+        mg_http_send_error(nc, 500, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    char *body = malloc(hm->body.len + 1);
-    memcpy(body, hm->body.ptr, hm->body.len);
+    memcpy(body, hm->body.p, hm->body.len);
    *(body + hm->body.len) = '\0';

    char url[4096];
    snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);

-    nc->fn_data = web_post_async(url, body);
+    nc->user_data = web_post_async(url, body);
 }

 void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@@ -218,16 +259,16 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
             idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);

    dyn_buffer_t encoded = url_escape(url);
-    dyn_buffer_write_char(&encoded, '\0');
-
-    char location_header[8192];
-    snprintf(location_header, sizeof(location_header), "Location: %s\r\n", encoded.buf);
-
-    mg_http_reply(nc, 308, location_header, "");
+    mg_http_send_redirect(
+            nc, 308,
+            (struct mg_str) MG_MK_STR_N(encoded.buf, encoded.cur),
+            (struct mg_str) MG_NULL_STR
+    );
    dyn_buffer_destroy(&encoded);
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
+void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct http_message *hm) {

    const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
    const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
@@ -248,10 +289,10 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)

    char disposition[8192];
-    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"\r\n",
+    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
             name, strlen(ext) == 0 ? "" : ".", ext);

-    mg_http_serve_file(nc, hm, full_path, mime, disposition);
+    mg_http_serve_file(nc, hm, full_path, mg_mk_str(mime), mg_mk_str(disposition));
 }

 void index_info(struct mg_connection *nc) {
@@ -264,7 +305,7 @@ void index_info(struct mg_connection *nc) {
        cJSON *idx_json = cJSON_CreateObject();
        cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
        cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
-        cJSON_AddStringToObject(idx_json, "id", idx->desc.id);
+        cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
        cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
        cJSON_AddItemToArray(arr, idx_json);
    }
@@ -275,35 +316,40 @@ void index_info(struct mg_connection *nc) {
    mg_send(nc, json_str, strlen(json_str));
    free(json_str);
    cJSON_Delete(json);
+
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }


-void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
+void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
-        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+    if (path->len != UUID_STR_LEN + 2) {
+        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_uuid[UUID_STR_LEN];
+    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
+    *(arg_uuid + UUID_STR_LEN - 1) = '\0';

-    cJSON *doc = elastic_get_document(arg_md5);
+    cJSON *doc = elastic_get_document(arg_uuid);
    cJSON *source = cJSON_GetObjectItem(doc, "_source");

    cJSON *index_id = cJSON_GetObjectItem(source, "index");
    if (index_id == NULL) {
        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    index_t *idx = get_index_by_id(index_id->valuestring);
    if (idx == NULL) {
        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

@@ -312,21 +358,24 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
    mg_send(nc, json_str, (int) strlen(json_str));
    free(json_str);
    cJSON_Delete(doc);
+
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

-void file(struct mg_connection *nc, struct mg_http_message *hm) {
+void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

-    if (hm->uri.len != MD5_STR_LENGTH + 2) {
-        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+    if (path->len != UUID_STR_LEN + 2) {
+        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_md5[MD5_STR_LENGTH];
-    memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
-    *(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
+    char arg_uuid[UUID_STR_LEN];
+    memcpy(arg_uuid, hm->uri.p + 3, UUID_STR_LEN);
+    *(arg_uuid + UUID_STR_LEN - 1) = '\0';

-    const char *next = arg_md5;
+    const char *next = arg_uuid;
    cJSON *doc = NULL;
    cJSON *index_id = NULL;
    cJSON *source = NULL;
@@ -337,7 +386,8 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
        index_id = cJSON_GetObjectItem(source, "index");
        if (index_id == NULL) {
            cJSON_Delete(doc);
-            mg_http_reply(nc, 404, "", "Not found");
+            mg_http_send_error(nc, 404, NULL);
+            nc->flags |= MG_F_SEND_AND_CLOSE;
            return;
        }
        cJSON *parent = cJSON_GetObjectItem(source, "parent");
@@ -351,7 +401,8 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {

    if (idx == NULL) {
        cJSON_Delete(doc);
-        mg_http_reply(nc, 404, "", "Not found");
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        mg_http_send_error(nc, 404, NULL);
        return;
    }

@@ -372,12 +423,14 @@ void status(struct mg_connection *nc) {
    }

    free(status);
+
+    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

 typedef struct {
    char *name;
    int delete;
-    char *path_md5_str;
+    char *relpath;
    char *doc_id;
 } tag_req_t;

@@ -397,9 +450,8 @@ tag_req_t *parse_tag_request(cJSON *json) {
        return NULL;
    }

-    cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
-    if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
-        strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
+    cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
+    if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
        return NULL;
    }

@@ -411,38 +463,41 @@ tag_req_t *parse_tag_request(cJSON *json) {
    tag_req_t *req = malloc(sizeof(tag_req_t));
    req->delete = arg_delete->valueint;
    req->name = arg_name->valuestring;
-    req->path_md5_str = arg_path_md5->valuestring;
+    req->relpath = arg_relpath->valuestring;
    req->doc_id = arg_doc_id->valuestring;

    return req;
 }

-void tag(struct mg_connection *nc, struct mg_http_message *hm) {
-    if (hm->uri.len != MD5_STR_LENGTH + 4) {
-        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
-        mg_http_reply(nc, 404, "", "Not found");
+void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
+    if (path->len != UUID_STR_LEN + 4) {
+        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

-    char arg_index[MD5_STR_LENGTH];
-    memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
-    *(arg_index + MD5_STR_LENGTH - 1) = '\0';
+    char arg_index[UUID_STR_LEN];
+    memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
+    *(arg_index + UUID_STR_LEN - 1) = '\0';

    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
        LOG_DEBUG("serve.c", "Invalid tag request")
-        mg_http_reply(nc, 404, "", "Not found");
+        mg_http_send_error(nc, 400, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    store_t *store = get_tag_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
-        mg_http_reply(nc, 404, "", "Not found");
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    char *body = malloc(hm->body.len + 1);
-    memcpy(body, hm->body.ptr, hm->body.len);
+    memcpy(body, hm->body.p, hm->body.len);
    *(body + hm->body.len) = '\0';
    cJSON *json = cJSON_Parse(body);

@@ -451,14 +506,15 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
        cJSON_Delete(json);
        free(body);
-        mg_http_reply(nc, 400, "", "Invalid request");
+        mg_http_send_error(nc, 400, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }

    cJSON *arr = NULL;

    size_t data_len = 0;
-    const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
+    const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
    if (data_len == 0) {
        arr = cJSON_CreateArray();
    } else {
@@ -494,7 +550,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {

        char url[4096];
        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf);
+        nc->user_data = web_post_async(url, buf);

    } else {
        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
@@ -514,12 +570,11 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {

        char url[4096];
        snprintf(url, sizeof(url), "%s/%s/_update/%s", WebCtx.es_url, WebCtx.es_index, arg_req->doc_id);
-        nc->fn_data = web_post_async(url, buf);
+        nc->user_data = web_post_async(url, buf);
    }

    char *json_str = cJSON_PrintUnformatted(arr);
-    store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
-    store_flush(store);
+    store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);

    free(arg_req);
    free(json_str);
@@ -528,22 +583,39 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
    free(body);
 }

-int validate_auth(struct mg_connection *nc, struct mg_http_message *hm) {
+int validate_auth(struct mg_connection *nc, struct http_message *hm) {
    char user[256] = {0,};
    char pass[256] = {0,};

-    mg_http_creds(hm, user, sizeof(user), pass, sizeof(pass));
-    if (strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
-        mg_http_reply(nc, 401, "WWW-Authenticate: Basic realm=\"sist2\"\r\n", "");
+    int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
+    if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
+        mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
+                      "WWW-Authenticate: Basic realm=\"sist2\"\r\n"
+                      "Content-Length: 0\r\n\r\n");
+        nc->flags |= MG_F_SEND_AND_CLOSE;
        return FALSE;
    }
    return TRUE;
 }

-static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
+static void ev_router(struct mg_connection *nc, int ev, void *p) {
+    struct mg_str scheme;
+    struct mg_str user_info;
+    struct mg_str host;
+    unsigned int port;
+    struct mg_str path;
+    struct mg_str query;
+    struct mg_str fragment;
+
+    if (ev == MG_EV_HTTP_REQUEST) {
+        struct http_message *hm = (struct http_message *) p;
+
+        if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
+            mg_http_send_error(nc, 400, NULL);
+            nc->flags |= MG_F_SEND_AND_CLOSE;
+            return;
+        }

-    if (ev == MG_EV_HTTP_MSG) {
-        struct mg_http_message *hm = (struct mg_http_message *) ev_data;

        if (WebCtx.auth_enabled == TRUE) {
            if (!validate_auth(nc, hm)) {
@@ -551,48 +623,52 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
            }
        }

-        if (mg_http_match_uri(hm, "/")) {
+        if (is_equal(&path, &((struct mg_str) MG_MK_STR("/")))) {
            search_index(nc);
-        } else if (mg_http_match_uri(hm, "/css")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/css")))) {
            style(nc, hm);
-        } else if (mg_http_match_uri(hm, "/stats")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/stats")))) {
            stats(nc);
-        } else if (mg_http_match_uri(hm, "/jslib")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jslib")))) {
            javascript_lib(nc);
-        } else if (mg_http_match_uri(hm, "/jssearch")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/jssearch")))) {
            javascript_search(nc);
-        } else if (mg_http_match_uri(hm, "/img/sprite-skin-flat.png")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/img/sprite-skin-flat.png")))) {
            img_sprite_skin_flat(nc, hm);
-        } else if (mg_http_match_uri(hm, "/es")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/es")))) {
            search(nc, hm);
-        } else if (mg_http_match_uri(hm, "/i")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/i")))) {
            index_info(nc);
-        } else if (mg_http_match_uri(hm, "/status")) {
+        } else if (is_equal(&path, &((struct mg_str) MG_MK_STR("/status")))) {
            status(nc);
-        } else if (mg_http_match_uri(hm, "/f/*")) {
-            file(nc, hm);
-        } else if (mg_http_match_uri(hm, "/t/*/*")) {
-            thumbnail(nc, hm);
-        } else if (mg_http_match_uri(hm, "/s/*/*")) {
-            stats_files(nc, hm);
-        } else if (mg_http_match_uri(hm, "/tag/*")) {
-            if (WebCtx.tag_auth_enabled == TRUE && !validate_auth(nc, hm)) {
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/f/")))) {
+            file(nc, hm, &path);
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/t/")))) {
+            thumbnail(nc, hm, &path);
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
+            stats_files(nc, hm, &path);
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
+            if (WebCtx.tag_auth_enabled == TRUE) {
+                if (!validate_auth(nc, hm)) {
                    return;
                }
-            tag(nc, hm);
-        } else if (mg_http_match_uri(hm, "/d/*")) {
-            document_info(nc, hm);
+            }
+            tag(nc, hm, &path);
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
+            document_info(nc, hm, &path);
        } else {
-            mg_http_reply(nc, 404, "", "Page not found");
+            mg_http_send_error(nc, 404, NULL);
+            nc->flags |= MG_F_SEND_AND_CLOSE;
        }

    } else if (ev == MG_EV_POLL) {
-        if (nc->fn_data != NULL) {
+        if (nc->user_data != NULL) {
            //Waiting for ES reply
-            subreq_ctx_t *ctx = (subreq_ctx_t *) nc->fn_data;
+            subreq_ctx_t *ctx = (subreq_ctx_t *) nc->user_data;
            web_post_async_poll(ctx);

            if (ctx->done == TRUE) {
+
                response_t *r = ctx->response;

                if (r->status_code == 200) {
@@ -612,14 +688,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
                        free(json_str);
                        free(tmp);
                    }
-
-                    mg_http_reply(nc, 500, "", "");
+                    mg_http_send_error(nc, 500, NULL);
                }

                free_response(r);
                free(ctx->data);
                free(ctx);
-                nc->fn_data = NULL;
+                nc->flags |= MG_F_SEND_AND_CLOSE;
+                nc->user_data = NULL;
            }
        }
    }
@@ -630,18 +706,15 @@ void serve(const char *listen_address) {
    printf("Starting web server @ http://%s\n", listen_address);

    struct mg_mgr mgr;
-    mg_mgr_init(&mgr);
+    mg_mgr_init(&mgr, NULL);

-    int ok = 1;
-
-    struct mg_connection *nc = mg_http_listen(&mgr, listen_address, ev_router, NULL);
+    struct mg_connection *nc = mg_bind(&mgr, listen_address, ev_router);
    if (nc == NULL) {
        LOG_FATALF("serve.c", "Couldn't bind web server on address %s", listen_address)
    }
+    mg_set_protocol_http_websocket(nc);

-    while (ok) {
+    for (;;) {
        mg_mgr_poll(&mgr, 10);
    }
-    mg_mgr_free(&mgr);
-    LOG_INFO("serve.c", "Finished web event loop")
 }
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/tests/test_scan.py
+++ b/tests/test_scan.py
@@ -1,77 +0,0 @@
-import unittest
-import subprocess
-import shutil
-import json
-import os
-
-TEST_FILES = "third-party/libscan/libscan-test-files/test_files"
-
-
-def copy_files(files):
-    base = os.path.basename(files)
-    new_path = os.path.join("/tmp/sist2_test/", base)
-
-    shutil.rmtree(new_path, ignore_errors=True)
-    shutil.copytree(files, new_path)
-    return new_path
-
-
-def sist2(*args):
-    print("./sist2 " + " ".join(args))
-
-    return subprocess.check_output(
-        args=["./sist2", *args],
-    )
-
-
-def sist2_index(files, *args):
-    path = copy_files(files)
-
-    shutil.rmtree("test_i", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i", *args)
-    return iter(sist2_index_to_dict("test_i"))
-
-
-def sist2_incremental_index(files, func=None, *args):
-    path = copy_files(files)
-
-    if func:
-        func(path)
-
-    shutil.rmtree("test_i_inc", ignore_errors=True)
-    sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
-    return iter(sist2_index_to_dict("test_i_inc"))
-
-
-def sist2_index_to_dict(index):
-    res = subprocess.check_output(
-        args=["./sist2", "index", "--print", index],
-    )
-
-    for line in res.splitlines():
-        if line:
-            yield json.loads(line)
-
-
-class ScanTest(unittest.TestCase):
-
-    def test_incremental1(self):
-        def remove_files(path):
-            os.remove(os.path.join(path, "msdoc/test1.doc"))
-            os.remove(os.path.join(path, "msdoc/test2.doc"))
-
-        def add_files(path):
-            with open(os.path.join(path, "newfile1"), "w"):
-                pass
-            with open(os.path.join(path, "newfile2"), "w"):
-                pass
-            with open(os.path.join(path, "newfile3"), "w"):
-                pass
-
-        file_count = sum(1 for _ in sist2_index(TEST_FILES))
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
-        self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/third-party/argparse
+++ b/third-party/argparse
--- a/third-party/libscan
+++ b/third-party/libscan
Author	SHA1	Message	Date
simon987	2766f2419f	Better support for .doc files	2020-12-16 20:06:06 -05:00
simon987	7a88a2c956	Update README.md	2020-12-08 18:16:19 -05:00
simon987	c229a0bd05	Update README.md	2020-11-17 12:34:54 -05:00
simon987	7fce56da03	Add .doc support	2020-11-15 21:18:02 -05:00
acc557	50a1ed43fe	Use relative path for loading csv in stats	2020-11-13 08:30:43 -05:00
acc557	4117b88cbb	Update search.html Fix relative stats URL	2020-11-13 08:30:22 -05:00
simon987	c6bfe6ec83	Fix relative image URL #122	2020-11-12 08:06:57 -05:00
simon987	3ae6e9b604	update build instructions	2020-11-10 21:52:45 -05:00
simon987	c82b1658fe	Update libmupdf	2020-10-24 15:38:31 -04:00
simon987	9386f75d78	Optionally ES schema from file #117	2020-10-24 15:38:31 -04:00
simon987	31cc2c0c39	sidecar files #114 , version bump	2020-10-24 15:38:31 -04:00
simon987	a468cab46d	Fix typo	2020-10-24 15:38:31 -04:00
simon987	2aea57b985	Fix #110	2020-10-24 15:38:31 -04:00
				`@@ -1 +0,0 @@`
				!function(n){"use strict";function d(n,t){var r=(65535&n)+(65535&t);return(n>>16)+(t>>16)+(r>>16)<<16\|65535&r}function f(n,t,r,e,o,u){return d((c=d(d(t,n),d(e,u)))<<(f=o)\|c>>>32-f,r);var c,f}function l(n,t,r,e,o,u,c){return f(t&r\|~t&e,n,t,o,u,c)}function v(n,t,r,e,o,u,c){return f(t&e\|r&~e,n,t,o,u,c)}function g(n,t,r,e,o,u,c){return f(t^r^e,n,t,o,u,c)}function m(n,t,r,e,o,u,c){return f(r^(t\|~e),n,t,o,u,c)}function i(n,t){var r,e,o,u;n[t>>5]\|=128<<t%32,n[14+(t+64>>>9<<4)]=t;for(var c=1732584193,f=-271733879,i=-1732584194,a=271733878,h=0;h<n.length;h+=16)c=l(r=c,e=f,o=i,u=a,n[h],7,-680876936),a=l(a,c,f,i,n[h+1],12,-389564586),i=l(i,a,c,f,n[h+2],17,606105819),f=l(f,i,a,c,n[h+3],22,-1044525330),c=l(c,f,i,a,n[h+4],7,-176418897),a=l(a,c,f,i,n[h+5],12,1200080426),i=l(i,a,c,f,n[h+6],17,-1473231341),f=l(f,i,a,c,n[h+7],22,-45705983),c=l(c,f,i,a,n[h+8],7,1770035416),a=l(a,c,f,i,n[h+9],12,-1958414417),i=l(i,a,c,f,n[h+10],17,-42063),f=l(f,i,a,c,n[h+11],22,-1990404162),c=l(c,f,i,a,n[h+12],7,1804603682),a=l(a,c,f,i,n[h+13],12,-40341101),i=l(i,a,c,f,n[h+14],17,-1502002290),c=v(c,f=l(f,i,a,c,n[h+15],22,1236535329),i,a,n[h+1],5,-165796510),a=v(a,c,f,i,n[h+6],9,-1069501632),i=v(i,a,c,f,n[h+11],14,643717713),f=v(f,i,a,c,n[h],20,-373897302),c=v(c,f,i,a,n[h+5],5,-701558691),a=v(a,c,f,i,n[h+10],9,38016083),i=v(i,a,c,f,n[h+15],14,-660478335),f=v(f,i,a,c,n[h+4],20,-405537848),c=v(c,f,i,a,n[h+9],5,568446438),a=v(a,c,f,i,n[h+14],9,-1019803690),i=v(i,a,c,f,n[h+3],14,-187363961),f=v(f,i,a,c,n[h+8],20,1163531501),c=v(c,f,i,a,n[h+13],5,-1444681467),a=v(a,c,f,i,n[h+2],9,-51403784),i=v(i,a,c,f,n[h+7],14,1735328473),c=g(c,f=v(f,i,a,c,n[h+12],20,-1926607734),i,a,n[h+5],4,-378558),a=g(a,c,f,i,n[h+8],11,-2022574463),i=g(i,a,c,f,n[h+11],16,1839030562),f=g(f,i,a,c,n[h+14],23,-35309556),c=g(c,f,i,a,n[h+1],4,-1530992060),a=g(a,c,f,i,n[h+4],11,1272893353),i=g(i,a,c,f,n[h+7],16,-155497632),f=g(f,i,a,c,n[h+10],23,-1094730640),c=g(c,f,i,a,n[h+13],4,681279174),a=g(a,c,f,i,n[h],11,-358537222),i=g(i,a,c,f,n[h+3],16,-722521979),f=g(f,i,a,c,n[h+6],23,76029189),c=g(c,f,i,a,n[h+9],4,-640364487),a=g(a,c,f,i,n[h+12],11,-421815835),i=g(i,a,c,f,n[h+15],16,530742520),c=m(c,f=g(f,i,a,c,n[h+2],23,-995338651),i,a,n[h],6,-198630844),a=m(a,c,f,i,n[h+7],10,1126891415),i=m(i,a,c,f,n[h+14],15,-1416354905),f=m(f,i,a,c,n[h+5],21,-57434055),c=m(c,f,i,a,n[h+12],6,1700485571),a=m(a,c,f,i,n[h+3],10,-1894986606),i=m(i,a,c,f,n[h+10],15,-1051523),f=m(f,i,a,c,n[h+1],21,-2054922799),c=m(c,f,i,a,n[h+8],6,1873313359),a=m(a,c,f,i,n[h+15],10,-30611744),i=m(i,a,c,f,n[h+6],15,-1560198380),f=m(f,i,a,c,n[h+13],21,1309151649),c=m(c,f,i,a,n[h+4],6,-145523070),a=m(a,c,f,i,n[h+11],10,-1120210379),i=m(i,a,c,f,n[h+2],15,718787259),f=m(f,i,a,c,n[h+9],21,-343485551),c=d(c,r),f=d(f,e),i=d(i,o),a=d(a,u);return[c,f,i,a]}function a(n){for(var t="",r=32n.length,e=0;e<r;e+=8)t+=String.fromCharCode(n[e>>5]>>>e%32&255);return t}function h(n){var t=[];for(t[(n.length>>2)-1]=void 0,e=0;e<t.length;e+=1)t[e]=0;for(var r=8n.length,e=0;e<r;e+=8)t[e>>5]\|=(255&n.charCodeAt(e/8))<<e%32;return t}function e(n){for(var t,r="0123456789abcdef",e="",o=0;o<n.length;o+=1)t=n.charCodeAt(o),e+=r.charAt(t>>>4&15)+r.charAt(15&t);return e}function r(n){return unescape(encodeURIComponent(n))}function o(n){return a(i(h(t=r(n)),8t.length));var t}function u(n,t){return function(n,t){var r,e,o=h(n),u=[],c=[];for(u[15]=c[15]=void 0,16<o.length&&(o=i(o,8n.length)),r=0;r<16;r+=1)u[r]=909522486^o[r],c[r]=1549556828^o[r];return e=i(u.concat(h(t)),512+8*t.length),a(i(c.concat(e),640))}(r(n),r(t))}function t(n,t,r){return t?r?u(t,n):e(u(t,n)):r?o(n):e(o(n))}"function"==typeof define&&define.amd?define(function(){return t}):"object"==typeof module&&module.exports?module.exports=t:n.md5=t}(this);