Fix tag delete

Update tags tab automatically
Update binary names (again)
2025-12-12 15:08:53 +00:00 · 2020-12-31 12:55:37 -05:00 · 2020-12-31 12:45:23 -05:00 · 2020-12-31 11:03:25 -05:00 · 2020-12-31 10:55:34 -05:00 · 2020-12-31 10:54:30 -05:00
159 changed files with 7525 additions and 8358 deletions
--- a/.drone.yml
+++ b/.drone.yml
@@ -0,0 +1,56 @@
+kind: pipeline
+type: docker
+name: amd64
+
+platform:
+  os: linux
+  arch: amd64
+
+steps:
+  - name: build
+    image: simon987/ubuntu_ci
+    commands:
+      - ./ci/build.sh
+  - name: scp files
+    image: appleboy/drone-scp
+    settings:
+      host:
+        from_secret: SSH_HOST
+      port:
+        from_secret: SSH_PORT
+      user:
+        from_secret: SSH_USER
+      key:
+        from_secret: SSH_KEY
+      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
+      source:
+        - ./sist2-x64-linux
+        - ./sist2-x64-linux-debug.tar.gz
+
+---
+kind: pipeline
+type: docker
+name: arm64
+
+platform:
+  arch: arm64
+
+steps:
+  - name: build
+    image: simon987/ubuntu_ci_arm
+    commands:
+      - ./ci/build_arm64.sh
+  - name: scp files
+    image: appleboy/drone-scp
+    settings:
+      host:
+        from_secret: SSH_HOST
+      port:
+        from_secret: SSH_PORT
+      user:
+        from_secret: SSH_USER
+      key:
+        from_secret: SSH_KEY
+      target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
+      source:
+        - ./sist2-arm64-linux
--- a/.github/ISSUE_TEMPLATE/issue-template.md
+++ b/.github/ISSUE_TEMPLATE/issue-template.md
@@ -0,0 +1,18 @@
+---
+name: Issue template
+about: General
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+sist2 version:
+
+Platform (Linux or Docker, x86-64 or arm64):
+
+Elasticsearch version:
+
+Command with arguments:  `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`
+
+If the issue is related to the `scan` module, please attach the files necessary to reproduce the error or email them to me[at]simon987.net.
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
 .idea
 thumbs
-test
 *.cbp
 CMakeCache.txt
 CMakeFiles
@@ -16,3 +15,4 @@ bundle.js
 *.a
 vgcore.*
 build/
+third-party/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,42 +1,6 @@
-[submodule "argparse"]
-	path = argparse
+[submodule "third-party/libscan"]
+	path = third-party/libscan
+	url = https://github.com/simon987/libscan
+[submodule "third-party/argparse"]
+	path = third-party/argparse
 	url = https://github.com/cofyc/argparse
-[submodule "cJSON"]
-	path = cJSON
-	url = https://github.com/DaveGamble/cJSON
-[submodule "lmdb"]
-	path = lmdb
-	url = https://github.com/LMDB/lmdb
-[submodule "utf8.h"]
-	path = utf8.h
-	url = https://github.com/sheredom/utf8.h
-[submodule "lib/bzip2-1.0.6"]
-	path = lib/bzip2-1.0.6
-	url = https://github.com/enthought/bzip2-1.0.6
-[submodule "lib/libmagic"]
-	path = lib/libmagic
-	url = https://github.com/threatstack/libmagic
-[submodule "lib/harfbuzz"]
-	path = lib/harfbuzz
-	url = https://github.com/harfbuzz/harfbuzz
-[submodule "lib/openjpeg"]
-	path = lib/openjpeg
-	url = https://github.com/uclouvain/openjpeg
-[submodule "lib/ffmpeg"]
-	path = lib/ffmpeg
-	url = https://git.ffmpeg.org/ffmpeg.git
-[submodule "lib/onion"]
-	path = lib/onion
-	url = https://github.com/davidmoreno/onion
-[submodule "lib/mupdf"]
-	path = lib/mupdf
-	url = git://git.ghostscript.com/mupdf.git
-[submodule "lib/tesseract"]
-	path = lib/tesseract
-	url = https://github.com/tesseract-ocr/tesseract
-[submodule "lib/leptonica"]
-	path = lib/leptonica
-	url = https://github.com/danbloomberg/leptonica
-[submodule "lib/libtiff"]
-	path = lib/libtiff
-	url = https://gitlab.com/libtiff/libtiff
--- a/.teamcity/settings.kts
+++ b/.teamcity/settings.kts
@@ -1,69 +0,0 @@
-import jetbrains.buildServer.configs.kotlin.v2019_2.*
-import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
-import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
-import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
-import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
-
-/*
-The settings script is an entry point for defining a TeamCity
-project hierarchy. The script should contain a single call to the
-project() function with a Project instance or an init function as
-an argument.
-
-VcsRoots, BuildTypes, Templates, and subprojects can be
-registered inside the project using the vcsRoot(), buildType(),
-template(), and subProject() methods respectively.
-
-To debug settings scripts in command-line, run the
-
-    mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
-
-command and attach your debugger to the port 8000.
-
-To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
-> Tool Windows -> Maven Projects), find the generate task node
-(Plugins -> teamcity-configs -> teamcity-configs:generate), the
-'Debug' option is available in the context menu for the task.
-*/
-
-version = "2019.2"
-
-project {
-
-    vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
-
-    buildType(Build)
-}
-
-object Build : BuildType({
-    name = "Build"
-
-    artifactRules = """
-        sist2
-        sist2_scan
-    """.trimIndent()
-
-    vcs {
-        root(HttpsGithubComSimon987sist2refsHeadsMaster)
-    }
-
-    steps {
-        exec {
-            name = "Build"
-            path = "./ci/build.sh"
-            dockerImage = "simon987/general_ci"
-            dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
-            dockerPull = true
-        }
-    }
-
-    triggers {
-        vcs {
-        }
-    }
-})
-
-object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
-    name = "https://github.com/simon987/sist2#refs/heads/master"
-    url = "https://github.com/simon987/sist2"
-})
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,134 +2,117 @@ cmake_minimum_required(VERSION 3.7)
 set(CMAKE_C_STANDARD 11)

 project(sist2 C)
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")

-add_executable(
-        sist2
+option(SIST_DEBUG "Build a debug executable" on)
+
+set(BUILD_TESTS off)
+add_subdirectory(third-party/libscan)
+set(ARGPARSE_SHARED off)
+add_subdirectory(third-party/argparse)
+
+add_executable(sist2
+
+        # argparse
+        third-party/argparse/argparse.h third-party/argparse/argparse.c
+
        src/main.c
        src/sist.h
        src/io/walk.h src/io/walk.c
-        src/parsing/media.h src/parsing/media.c
-        src/parsing/pdf.h src/parsing/pdf.c
        src/io/store.h src/io/store.c
        src/tpool.h src/tpool.c
        src/parsing/parse.h src/parsing/parse.c
        src/io/serialize.h src/io/serialize.c
        src/parsing/mime.h src/parsing/mime.c src/parsing/mime_generated.c
-        src/parsing/text.h src/parsing/text.c
        src/index/web.c src/index/web.h
        src/web/serve.c src/web/serve.h
-        src/web/auth_basic.h src/web/auth_basic.c
        src/index/elastic.c src/index/elastic.h
        src/util.c src/util.h
-        src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
-        src/parsing/arc.c src/parsing/arc.h
-        src/parsing/doc.c src/parsing/doc.h
+        src/ctx.h src/types.h
        src/log.c src/log.h
-
-        # argparse
-        argparse/argparse.h argparse/argparse.c
-
-        # cJSON
-        cJSON/cJSON.h cJSON/cJSON.c
-
-        # LMDB
-        lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
-        lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
        src/cli.c src/cli.h
+        src/stats.c src/stats.h src/ctx.c
+        src/parsing/sidecar.c src/parsing/sidecar.h)

-        # utf8.h
-        utf8.h/utf8.h
-)
+target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
+set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)

-find_package(PkgConfig REQUIRED)
-set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")
+find_package(lmdb CONFIG REQUIRED)
+find_package(cJSON CONFIG REQUIRED)
+find_package(unofficial-glib CONFIG REQUIRED)
+find_package(unofficial-mongoose CONFIG REQUIRED)
+find_package(CURL CONFIG REQUIRED)

 #find_package(OpenSSL REQUIRED)
-find_package(Freetype REQUIRED)

-pkg_check_modules(GLIB REQUIRED glib-2.0)
-pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
-pkg_check_modules(UUID REQUIRED uuid)
-
-add_definitions(${UUID_CFLAGS_OTHER})
-add_definitions(${GLIB_CFLAGS_OTHER})
-add_definitions(${GOBJECT_CFLAGS_OTHER})
-add_definitions(${FREETYPE_CFLAGS_OTHER})
-
-list(REMOVE_ITEM GLIB_LIBRARIES pcre)
-list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
-list(REMOVE_ITEM UUID_LIBRARIES pcre)

 target_include_directories(
        sist2 PUBLIC
-        ${GOBJECT_INCLUDE_DIRS}
-        ${GLIB_INCLUDE_DIRS}
-        ${PROJECT_SOURCE_DIR}/lib/ffmpeg/
-        ${FREETYPE_INCLUDE_DIRS}
-        ${UUID_INCLUDE_DIRS}
-        ${PROJECT_SOURCE_DIR}/
-        ${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
-        ${PROJECT_SOURCE_DIR}/lib/onion/src/
-        ${PROJECT_SOURCE_DIR}/lib/mupdf/include/
-        ${PROJECT_SOURCE_DIR}/include/
-        /usr/include/libxml2/
-        ${PROJECT_SOURCE_DIR}/lib/tesseract/include/
-)
-target_link_directories(
-        sist2 PUBLIC
-        ${UUID_LIBRARY_DIRS}
+        ${CMAKE_SOURCE_DIR}/third-party/onion/src/
+        ${CMAKE_SOURCE_DIR}/third-party/utf8.h/
+        ${CMAKE_SOURCE_DIR}/third-party/libscan/
+        ${CMAKE_SOURCE_DIR}/
 )

-target_compile_options(sist2
+target_compile_options(
+        sist2
+        PRIVATE
+        -fPIC
+)
+
+if (SIST_DEBUG)
+    target_compile_options(
+            sist2
+            PRIVATE
+            -g
+            -fstack-protector
+            -fno-omit-frame-pointer
+            -fsanitize=address
+            -fno-inline
+#            -O2
+    )
+    target_link_options(
+            sist2
+            PRIVATE
+            -fsanitize=address
+    )
+    set_target_properties(
+            sist2
+            PROPERTIES
+            OUTPUT_NAME sist2_debug
+    )
+else ()
+    target_compile_options(
+            sist2
            PRIVATE
            -Ofast
-        # -march=native
            -fno-stack-protector
            -fomit-frame-pointer
    )
+endif ()

-TARGET_LINK_LIBRARIES(
+add_dependencies(
+        sist2
+        scan
+        argparse
+)
+
+target_link_libraries(
        sist2

-        ${GLIB_LIBRARIES}
-        ${GOBJECT_LIBRARIES}
-        ${UUID_LIBRARIES}
-
-        # ffmpeg
-        ${PROJECT_SOURCE_DIR}/lib/libavcodec.a
-        ${PROJECT_SOURCE_DIR}/lib/libavformat.a
-        ${PROJECT_SOURCE_DIR}/lib/libavutil.a
-        ${PROJECT_SOURCE_DIR}/lib/libswscale.a
-        ${PROJECT_SOURCE_DIR}/lib/libswresample.a
-
-        # mupdf
-        ${PROJECT_SOURCE_DIR}/lib/libmupdf.a
-        ${PROJECT_SOURCE_DIR}/lib/libmupdf-third.a
-
-        # onion
-        ${PROJECT_SOURCE_DIR}/lib/libonion_static.a
+        z
+        lmdb
+        cjson
+        argparse
+        unofficial::glib::glib
+        unofficial::mongoose::mongoose
+        CURL::libcurl

        pthread
-        curl
-        m
-        bz2
-        ${PROJECT_SOURCE_DIR}/lib/libmagic.a
-        ${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
-        ${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
-        freetype
-        archive
+        magic

-        xml2
-        ${PROJECT_SOURCE_DIR}/lib/libopc/libmce.a
-        ${PROJECT_SOURCE_DIR}/lib/libopc/libopc.a
-        ${PROJECT_SOURCE_DIR}/lib/libopc/libplib.a
+        c

-        ${PROJECT_SOURCE_DIR}/lib/libtesseract.a
-        ${PROJECT_SOURCE_DIR}/lib/liblept.a
-        ${PROJECT_SOURCE_DIR}/lib/libtiff.a
-        png
-        stdc++
+        scan
 )

 add_custom_target(
--- a/CMakeModules/FindFFmpeg.cmake
+++ b/CMakeModules/FindFFmpeg.cmake
@@ -1,80 +0,0 @@
-# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
-# Once done this will define
-#
-# FFMPEG_FOUND - system has ffmpeg or libav
-# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
-# FFMPEG_LIBRARIES - Link these to use ffmpeg
-# FFMPEG_LIBAVCODEC
-# FFMPEG_LIBAVFORMAT
-# FFMPEG_LIBAVUTIL
-#
-# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
-# Modified for other libraries by Lasse Kärkkäinen <tronic>
-# Modified for Hedgewars by Stepik777
-#
-# Redistribution and use is allowed according to the terms of the New
-# BSD license.
-#
-
-if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
-# in cache already
-set(FFMPEG_FOUND TRUE)
-else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
-# use pkg-config to get the directories and then use these values
-# in the FIND_PATH() and FIND_LIBRARY() calls
-find_package(PkgConfig)
-if (PKG_CONFIG_FOUND)
-pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
-pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
-pkg_check_modules(_FFMPEG_AVUTIL libavutil)
-endif (PKG_CONFIG_FOUND)
-
-find_path(FFMPEG_AVCODEC_INCLUDE_DIR
-NAMES libavcodec/avcodec.h
-PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
-PATH_SUFFIXES ffmpeg libav
-)
-
-find_library(FFMPEG_LIBAVCODEC
-NAMES avcodec
-PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
-)
-
-find_library(FFMPEG_LIBAVFORMAT
-NAMES avformat
-PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
-)
-
-find_library(FFMPEG_LIBAVUTIL
-NAMES avutil
-PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
-)
-
-if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
-set(FFMPEG_FOUND TRUE)
-endif()
-
-if (FFMPEG_FOUND)
-set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
-
-set(FFMPEG_LIBRARIES
-${FFMPEG_LIBAVCODEC}
-${FFMPEG_LIBAVFORMAT}
-${FFMPEG_LIBAVUTIL}
-)
-
-endif (FFMPEG_FOUND)
-
-if (FFMPEG_FOUND)
-if (NOT FFMPEG_FIND_QUIETLY)
-message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
-endif (NOT FFMPEG_FIND_QUIETLY)
-else (FFMPEG_FOUND)
-if (FFMPEG_FIND_REQUIRED)
-message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
-endif (FFMPEG_FIND_REQUIRED)
-endif (FFMPEG_FOUND)
-
-endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
-
-
--- a/CMakeModules/FindLibMagic.cmake
+++ b/CMakeModules/FindLibMagic.cmake
@@ -1,100 +0,0 @@
-
-#-------------------------------------------------------------------------------
-# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-#  * Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#-------------------------------------------------------------------------------
-
-# - Check for the presence of LIBMAGIC
-#
-# The following variables are set when LIBMAGIC is found:
-#  LIBMAGIC_FOUND      = Set to true, if all components of LIBMAGIC have been
-#                        found.
-#  LIBMAGIC_INCLUDES   = Include path for the header files of LIBMAGIC
-#  LIBMAGIC_LIBRARIES  = Link these to use LIBMAGIC
-#  LIBMAGIC_LFLAGS     = Linker flags (optional)
-
-if (NOT LIBMAGIC_FOUND)
-
-  if (NOT LIBMAGIC_ROOT_DIR)
-    set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
-  endif (NOT LIBMAGIC_ROOT_DIR)
-
-  ##____________________________________________________________________________
-  ## Check for the header files
-
-  find_path (LIBMAGIC_FILE_H
-    NAMES file/file.h
-    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
-    PATH_SUFFIXES include
-    )
-  if (LIBMAGIC_FILE_H)
-    list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
-  endif (LIBMAGIC_FILE_H)
-
-  find_path (LIBMAGIC_MAGIC_H
-    NAMES magic.h
-    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
-    PATH_SUFFIXES include include/linux
-    )
-  if (LIBMAGIC_MAGIC_H)
-    list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
-  endif (LIBMAGIC_MAGIC_H)
-
-  list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
-
-  ##____________________________________________________________________________
-  ## Check for the library
-
-  find_library (LIBMAGIC_LIBRARIES magic
-    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
-    PATH_SUFFIXES lib
-    )
-
-  ##____________________________________________________________________________
-  ## Actions taken when all components have been found
-
-  #find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
-
-  if (LIBMAGIC_FOUND)
-    if (NOT LIBMAGIC_FIND_QUIETLY)
-      message (STATUS "Found components for LIBMAGIC")
-      message (STATUS "LIBMAGIC_ROOT_DIR  = ${LIBMAGIC_ROOT_DIR}")
-      message (STATUS "LIBMAGIC_INCLUDES  = ${LIBMAGIC_INCLUDES}")
-      message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
-    endif (NOT LIBMAGIC_FIND_QUIETLY)
-  else (LIBMAGIC_FOUND)
-    if (LIBMAGIC_FIND_REQUIRED)
-      message (FATAL_ERROR "Could not find LIBMAGIC!")
-    endif (LIBMAGIC_FIND_REQUIRED)
-  endif (LIBMAGIC_FOUND)
-
-  ##____________________________________________________________________________
-  ## Mark advanced variables
-
-  mark_as_advanced (
-    LIBMAGIC_ROOT_DIR
-    LIBMAGIC_INCLUDES
-    LIBMAGIC_LIBRARIES
-    )
-
-endif (NOT LIBMAGIC_FOUND)
--- a/CMakeModules/FindOpenSSL.cmake
+++ b/CMakeModules/FindOpenSSL.cmake
@@ -1,478 +0,0 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
-  if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
-     (("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
-      ("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
-    set(_OpenSSL_has_dependencies TRUE)
-    find_package(Threads)
-  else()
-    set(_OpenSSL_has_dependencies FALSE)
-  endif()
-endmacro()
-
-function(_OpenSSL_add_dependencies libraries_var library)
-  if(CMAKE_THREAD_LIBS_INIT)
-    list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
-  endif()
-  list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
-  set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
-endfunction()
-
-function(_OpenSSL_target_add_dependencies target)
-  if(_OpenSSL_has_dependencies)
-    set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
-    set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
-  endif()
-endfunction()
-
-if (UNIX)
-  find_package(PkgConfig QUIET)
-  pkg_check_modules(_OPENSSL QUIET openssl)
-endif ()
-
-# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
-if(OPENSSL_USE_STATIC_LIBS)
-  set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  if(WIN32)
-    set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  else()
-    set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
-  endif()
-endif()
-
-if (WIN32)
-  # http://www.slproweb.com/products/Win32OpenSSL.html
-  set(_OPENSSL_ROOT_HINTS
-    ${OPENSSL_ROOT_DIR}
-    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
-    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
-    ENV OPENSSL_ROOT_DIR
-    )
-  file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
-  set(_OPENSSL_ROOT_PATHS
-    "${_programfiles}/OpenSSL"
-    "${_programfiles}/OpenSSL-Win32"
-    "${_programfiles}/OpenSSL-Win64"
-    "C:/OpenSSL/"
-    "C:/OpenSSL-Win32/"
-    "C:/OpenSSL-Win64/"
-    )
-  unset(_programfiles)
-else ()
-  set(_OPENSSL_ROOT_HINTS
-    ${OPENSSL_ROOT_DIR}
-    ENV OPENSSL_ROOT_DIR
-    )
-endif ()
-
-set(_OPENSSL_ROOT_HINTS_AND_PATHS
-    HINTS ${_OPENSSL_ROOT_HINTS}
-    PATHS ${_OPENSSL_ROOT_PATHS}
-    )
-
-find_path(OPENSSL_INCLUDE_DIR
-  NAMES
-    openssl/ssl.h
-  ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-  HINTS
-    ${_OPENSSL_INCLUDEDIR}
-  PATH_SUFFIXES
-    include
-)
-
-if(WIN32 AND NOT CYGWIN)
-  if(MSVC)
-    # /MD and /MDd are the standard values - if someone wants to use
-    # others, the libnames have to change here too
-    # use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
-    # enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
-    # In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
-    #   * MD for dynamic-release
-    #   * MDd for dynamic-debug
-    #   * MT for static-release
-    #   * MTd for static-debug
-
-    # Implementation details:
-    # We are using the libraries located in the VC subdir instead of the parent directory even though :
-    # libeay32MD.lib is identical to ../libeay32.lib, and
-    # ssleay32MD.lib is identical to ../ssleay32.lib
-    # enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
-
-    if (OPENSSL_MSVC_STATIC_RT)
-      set(_OPENSSL_MSVC_RT_MODE "MT")
-    else ()
-      set(_OPENSSL_MSVC_RT_MODE "MD")
-    endif ()
-
-    # Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
-    if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
-        set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
-    else()
-        set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
-    endif()
-
-    if(OPENSSL_USE_STATIC_LIBS)
-      set(_OPENSSL_PATH_SUFFIXES
-        "lib/VC/static"
-        "VC/static"
-        "lib"
-        )
-    else()
-      set(_OPENSSL_PATH_SUFFIXES
-        "lib/VC"
-        "VC"
-        "lib"
-        )
-    endif ()
-
-    find_library(LIB_EAY_DEBUG
-      NAMES
-        libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
-        libcrypto${_OPENSSL_MSVC_RT_MODE}d
-        libcryptod
-        libeay32${_OPENSSL_MSVC_RT_MODE}d
-        libeay32d
-        cryptod
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        ${_OPENSSL_PATH_SUFFIXES}
-    )
-
-    find_library(LIB_EAY_RELEASE
-      NAMES
-        libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
-        libcrypto${_OPENSSL_MSVC_RT_MODE}
-        libcrypto
-        libeay32${_OPENSSL_MSVC_RT_MODE}
-        libeay32
-        crypto
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        ${_OPENSSL_PATH_SUFFIXES}
-    )
-
-    find_library(SSL_EAY_DEBUG
-      NAMES
-        libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
-        libssl${_OPENSSL_MSVC_RT_MODE}d
-        libssld
-        ssleay32${_OPENSSL_MSVC_RT_MODE}d
-        ssleay32d
-        ssld
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        ${_OPENSSL_PATH_SUFFIXES}
-    )
-
-    find_library(SSL_EAY_RELEASE
-      NAMES
-        libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
-        libssl${_OPENSSL_MSVC_RT_MODE}
-        libssl
-        ssleay32${_OPENSSL_MSVC_RT_MODE}
-        ssleay32
-        ssl
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        ${_OPENSSL_PATH_SUFFIXES}
-    )
-
-    set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
-    set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
-    set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
-    set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
-
-    include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
-    select_library_configurations(LIB_EAY)
-    select_library_configurations(SSL_EAY)
-
-    mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
-                     SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
-    set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
-    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
-  elseif(MINGW)
-    # same player, for MinGW
-    set(LIB_EAY_NAMES crypto libeay32)
-    set(SSL_EAY_NAMES ssl ssleay32)
-    find_library(LIB_EAY
-      NAMES
-        ${LIB_EAY_NAMES}
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        "lib/MinGW"
-        "lib"
-    )
-
-    find_library(SSL_EAY
-      NAMES
-        ${SSL_EAY_NAMES}
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      PATH_SUFFIXES
-        "lib/MinGW"
-        "lib"
-    )
-
-    mark_as_advanced(SSL_EAY LIB_EAY)
-    set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
-    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
-    unset(LIB_EAY_NAMES)
-    unset(SSL_EAY_NAMES)
-  else()
-    # Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
-    find_library(LIB_EAY
-      NAMES
-        libcrypto
-        libeay32
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      HINTS
-        ${_OPENSSL_LIBDIR}
-      PATH_SUFFIXES
-        lib
-    )
-
-    find_library(SSL_EAY
-      NAMES
-        libssl
-        ssleay32
-      NAMES_PER_DIR
-      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-      HINTS
-        ${_OPENSSL_LIBDIR}
-      PATH_SUFFIXES
-        lib
-    )
-
-    mark_as_advanced(SSL_EAY LIB_EAY)
-    set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
-    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
-  endif()
-else()
-
-  find_library(OPENSSL_SSL_LIBRARY
-    NAMES
-      ssl
-      ssleay32
-      ssleay32MD
-    NAMES_PER_DIR
-    ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-    HINTS
-      ${_OPENSSL_LIBDIR}
-    PATH_SUFFIXES
-      lib
-  )
-
-  find_library(OPENSSL_CRYPTO_LIBRARY
-    NAMES
-      crypto
-    NAMES_PER_DIR
-    ${_OPENSSL_ROOT_HINTS_AND_PATHS}
-    HINTS
-      ${_OPENSSL_LIBDIR}
-    PATH_SUFFIXES
-      lib
-  )
-
-  mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
-
-endif()
-
-# compat defines
-set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
-set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
-_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
-if(_OpenSSL_has_dependencies)
-  _OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
-  _OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
-endif()
-
-function(from_hex HEX DEC)
-  string(TOUPPER "${HEX}" HEX)
-  set(_res 0)
-  string(LENGTH "${HEX}" _strlen)
-
-  while (_strlen GREATER 0)
-    math(EXPR _res "${_res} * 16")
-    string(SUBSTRING "${HEX}" 0 1 NIBBLE)
-    string(SUBSTRING "${HEX}" 1 -1 HEX)
-    if (NIBBLE STREQUAL "A")
-      math(EXPR _res "${_res} + 10")
-    elseif (NIBBLE STREQUAL "B")
-      math(EXPR _res "${_res} + 11")
-    elseif (NIBBLE STREQUAL "C")
-      math(EXPR _res "${_res} + 12")
-    elseif (NIBBLE STREQUAL "D")
-      math(EXPR _res "${_res} + 13")
-    elseif (NIBBLE STREQUAL "E")
-      math(EXPR _res "${_res} + 14")
-    elseif (NIBBLE STREQUAL "F")
-      math(EXPR _res "${_res} + 15")
-    else()
-      math(EXPR _res "${_res} + ${NIBBLE}")
-    endif()
-
-    string(LENGTH "${HEX}" _strlen)
-  endwhile()
-
-  set(${DEC} ${_res} PARENT_SCOPE)
-endfunction()
-
-if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
-  file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
-       REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
-
-  if(openssl_version_str)
-    # The version number is encoded as 0xMNNFFPPS: major minor fix patch status
-    # The status gives if this is a developer or prerelease and is ignored here.
-    # Major, minor, and fix directly translate into the version numbers shown in
-    # the string. The patch field translates to the single character suffix that
-    # indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
-    # on.
-
-    string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
-           "\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
-    list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
-    list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
-    from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
-    list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
-    from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
-    list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
-
-    if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
-      from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
-      # 96 is the ASCII code of 'a' minus 1
-      math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
-      unset(_tmp)
-      # Once anyone knows how OpenSSL would call the patch versions beyond 'z'
-      # this should be updated to handle that, too. This has not happened yet
-      # so it is simply ignored here for now.
-      string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
-    endif ()
-
-    set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
-  endif ()
-endif ()
-
-set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
-list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
-
-foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
-  if(_comp STREQUAL "Crypto")
-    if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
-        (EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
-        EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
-        EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
-    )
-      set(OpenSSL_${_comp}_FOUND TRUE)
-    else()
-      set(OpenSSL_${_comp}_FOUND FALSE)
-    endif()
-  elseif(_comp STREQUAL "SSL")
-    if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
-        (EXISTS "${OPENSSL_SSL_LIBRARY}" OR
-        EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
-        EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
-    )
-      set(OpenSSL_${_comp}_FOUND TRUE)
-    else()
-      set(OpenSSL_${_comp}_FOUND FALSE)
-    endif()
-  else()
-    message(WARNING "${_comp} is not a valid OpenSSL component")
-    set(OpenSSL_${_comp}_FOUND FALSE)
-  endif()
-endforeach()
-unset(_comp)
-
-include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
-find_package_handle_standard_args(OpenSSL
-  REQUIRED_VARS
-    OPENSSL_CRYPTO_LIBRARY
-    OPENSSL_INCLUDE_DIR
-  VERSION_VAR
-    OPENSSL_VERSION
-  HANDLE_COMPONENTS
-  FAIL_MESSAGE
-    "Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
-)
-
-mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
-
-if(OPENSSL_FOUND)
-  if(NOT TARGET OpenSSL::Crypto AND
-      (EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
-        EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
-        EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
-      )
-    add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
-    set_target_properties(OpenSSL::Crypto PROPERTIES
-      INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
-    if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
-      set_target_properties(OpenSSL::Crypto PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES "C"
-        IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
-    endif()
-    if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
-      set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
-        IMPORTED_CONFIGURATIONS RELEASE)
-      set_target_properties(OpenSSL::Crypto PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
-        IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
-    endif()
-    if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
-      set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
-        IMPORTED_CONFIGURATIONS DEBUG)
-      set_target_properties(OpenSSL::Crypto PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
-        IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
-    endif()
-    _OpenSSL_target_add_dependencies(OpenSSL::Crypto)
-  endif()
-
-  if(NOT TARGET OpenSSL::SSL AND
-      (EXISTS "${OPENSSL_SSL_LIBRARY}" OR
-        EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
-        EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
-      )
-    add_library(OpenSSL::SSL UNKNOWN IMPORTED)
-    set_target_properties(OpenSSL::SSL PROPERTIES
-      INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
-    if(EXISTS "${OPENSSL_SSL_LIBRARY}")
-      set_target_properties(OpenSSL::SSL PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES "C"
-        IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
-    endif()
-    if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
-      set_property(TARGET OpenSSL::SSL APPEND PROPERTY
-        IMPORTED_CONFIGURATIONS RELEASE)
-      set_target_properties(OpenSSL::SSL PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
-        IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
-    endif()
-    if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
-      set_property(TARGET OpenSSL::SSL APPEND PROPERTY
-        IMPORTED_CONFIGURATIONS DEBUG)
-      set_target_properties(OpenSSL::SSL PROPERTIES
-        IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
-        IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
-    endif()
-    if(TARGET OpenSSL::Crypto)
-      set_target_properties(OpenSSL::SSL PROPERTIES
-        INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
-    endif()
-    _OpenSSL_target_add_dependencies(OpenSSL::SSL)
-  endif()
-endif()
-
-# Restore the original find library ordering
-if(OPENSSL_USE_STATIC_LIBS)
-  set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
-endif()
--- a/CMakeModules/FindPackageHandleStandardArgs.cmake
+++ b/CMakeModules/FindPackageHandleStandardArgs.cmake
@@ -1,268 +0,0 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
-
-# internal helper macro
-macro(_FPHSA_FAILURE_MESSAGE _msg)
-  set (__msg "${_msg}")
-  if (FPHSA_REASON_FAILURE_MESSAGE)
-    string(APPEND __msg "\n    Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
-  endif()
-  if (${_NAME}_FIND_REQUIRED)
-    message(FATAL_ERROR "${__msg}")
-  else ()
-    if (NOT ${_NAME}_FIND_QUIETLY)
-      message(STATUS "${__msg}")
-    endif ()
-  endif ()
-endmacro()
-
-
-# internal helper macro to generate the failure message when used in CONFIG_MODE:
-macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
-  # <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
-  if(${_NAME}_CONFIG)
-    _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
-  else()
-    # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
-    # List them all in the error message:
-    if(${_NAME}_CONSIDERED_CONFIGS)
-      set(configsText "")
-      list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
-      math(EXPR configsCount "${configsCount} - 1")
-      foreach(currentConfigIndex RANGE ${configsCount})
-        list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
-        list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
-        string(APPEND configsText "\n    ${filename} (version ${version})")
-      endforeach()
-      if (${_NAME}_NOT_FOUND_MESSAGE)
-        if (FPHSA_REASON_FAILURE_MESSAGE)
-          string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n    ")
-        else()
-          set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
-        endif()
-      else()
-        string(APPEND configsText "\n")
-      endif()
-      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
-
-    else()
-      # Simple case: No Config-file was found at all:
-      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
-    endif()
-  endif()
-endmacro()
-
-
-function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
-
-# Set up the arguments for `cmake_parse_arguments`.
-  set(options  CONFIG_MODE  HANDLE_COMPONENTS)
-  set(oneValueArgs  FAIL_MESSAGE  REASON_FAILURE_MESSAGE VERSION_VAR  FOUND_VAR)
-  set(multiValueArgs REQUIRED_VARS)
-
-# Check whether we are in 'simple' or 'extended' mode:
-  set(_KEYWORDS_FOR_EXTENDED_MODE  ${options} ${oneValueArgs} ${multiValueArgs} )
-  list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
-
-  if(${INDEX} EQUAL -1)
-    set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
-    set(FPHSA_REQUIRED_VARS ${ARGN})
-    set(FPHSA_VERSION_VAR)
-  else()
-    cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}"  ${_FIRST_ARG} ${ARGN})
-
-    if(FPHSA_UNPARSED_ARGUMENTS)
-      message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
-    endif()
-
-    if(NOT FPHSA_FAIL_MESSAGE)
-      set(FPHSA_FAIL_MESSAGE  "DEFAULT_MSG")
-    endif()
-
-    # In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
-    # when it successfully found the config-file, including version checking:
-    if(FPHSA_CONFIG_MODE)
-      list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
-      list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
-      set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
-    endif()
-
-    if(NOT FPHSA_REQUIRED_VARS)
-      message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
-    endif()
-  endif()
-
-# now that we collected all arguments, process them
-
-  if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
-    set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
-  endif()
-
-  list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
-
-  string(TOUPPER ${_NAME} _NAME_UPPER)
-  string(TOLOWER ${_NAME} _NAME_LOWER)
-
-  if(FPHSA_FOUND_VAR)
-    if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$"  OR  FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
-      set(_FOUND_VAR ${FPHSA_FOUND_VAR})
-    else()
-      message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
-    endif()
-  else()
-    set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
-  endif()
-
-  # collect all variables which were not found, so they can be printed, so the
-  # user knows better what went wrong (#6375)
-  set(MISSING_VARS "")
-  set(DETAILS "")
-  # check if all passed variables are valid
-  set(FPHSA_FOUND_${_NAME} TRUE)
-  foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
-    if(NOT ${_CURRENT_VAR})
-      set(FPHSA_FOUND_${_NAME} FALSE)
-      string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
-    else()
-      string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
-    endif()
-  endforeach()
-  if(FPHSA_FOUND_${_NAME})
-    set(${_NAME}_FOUND TRUE)
-    set(${_NAME_UPPER}_FOUND TRUE)
-  else()
-    set(${_NAME}_FOUND FALSE)
-    set(${_NAME_UPPER}_FOUND FALSE)
-  endif()
-
-  # component handling
-  unset(FOUND_COMPONENTS_MSG)
-  unset(MISSING_COMPONENTS_MSG)
-
-  if(FPHSA_HANDLE_COMPONENTS)
-    foreach(comp ${${_NAME}_FIND_COMPONENTS})
-      if(${_NAME}_${comp}_FOUND)
-
-        if(NOT DEFINED FOUND_COMPONENTS_MSG)
-          set(FOUND_COMPONENTS_MSG "found components:")
-        endif()
-        string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
-
-      else()
-
-        if(NOT DEFINED MISSING_COMPONENTS_MSG)
-          set(MISSING_COMPONENTS_MSG "missing components:")
-        endif()
-        string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
-
-        if(${_NAME}_FIND_REQUIRED_${comp})
-          set(${_NAME}_FOUND FALSE)
-          string(APPEND MISSING_VARS " ${comp}")
-        endif()
-
-      endif()
-    endforeach()
-    set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
-    string(APPEND DETAILS "[c${COMPONENT_MSG}]")
-  endif()
-
-  # version handling:
-  set(VERSION_MSG "")
-  set(VERSION_OK TRUE)
-
-  # check with DEFINED here as the requested or found version may be "0"
-  if (DEFINED ${_NAME}_FIND_VERSION)
-    if(DEFINED ${FPHSA_VERSION_VAR})
-      set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
-
-      if(${_NAME}_FIND_VERSION_EXACT)       # exact version required
-        # count the dots in the version string
-        string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
-        # add one dot because there is one dot more than there are components
-        string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
-        if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
-          # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
-          # is at most 4 here. Therefore a simple lookup table is used.
-          if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
-            set(_VERSION_REGEX "[^.]*")
-          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
-            set(_VERSION_REGEX "[^.]*\\.[^.]*")
-          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
-            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
-          else ()
-            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
-          endif ()
-          string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
-          unset(_VERSION_REGEX)
-          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
-            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
-            set(VERSION_OK FALSE)
-          else ()
-            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
-          endif ()
-          unset(_VERSION_HEAD)
-        else ()
-          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
-            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
-            set(VERSION_OK FALSE)
-          else ()
-            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
-          endif ()
-        endif ()
-        unset(_VERSION_DOTS)
-
-      else()     # minimum version specified:
-        if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
-          set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
-          set(VERSION_OK FALSE)
-        else ()
-          set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
-        endif ()
-      endif()
-
-    else()
-
-      # if the package was not found, but a version was given, add that to the output:
-      if(${_NAME}_FIND_VERSION_EXACT)
-         set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
-      else()
-         set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
-      endif()
-
-    endif()
-  else ()
-    # Check with DEFINED as the found version may be 0.
-    if(DEFINED ${FPHSA_VERSION_VAR})
-      set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
-    endif()
-  endif ()
-
-  if(VERSION_OK)
-    string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
-  else()
-    set(${_NAME}_FOUND FALSE)
-  endif()
-
-
-  # print the result:
-  if (${_NAME}_FOUND)
-    FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
-  else ()
-
-    if(FPHSA_CONFIG_MODE)
-      _FPHSA_HANDLE_FAILURE_CONFIG_MODE()
-    else()
-      if(NOT VERSION_OK)
-        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
-      else()
-        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
-      endif()
-    endif()
-
-  endif ()
-
-  set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
-  set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
-endfunction()
--- a/CMakeModules/FindPackageMessage.cmake
+++ b/CMakeModules/FindPackageMessage.cmake
@@ -1,48 +0,0 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-#[=======================================================================[.rst:
-FindPackageMessage
------------------
-
-.. code-block:: cmake
-
-  find_package_message(<name> "message for user" "find result details")
-
-This function is intended to be used in FindXXX.cmake modules files.
-It will print a message once for each unique find result.  This is
-useful for telling the user where a package was found.  The first
-argument specifies the name (XXX) of the package.  The second argument
-specifies the message to display.  The third argument lists details
-about the find result so that if they change the message will be
-displayed again.  The macro also obeys the QUIET argument to the
-find_package command.
-
-Example:
-
-.. code-block:: cmake
-
-  if(X11_FOUND)
-    find_package_message(X11 "Found X11: ${X11_X11_LIB}"
-      "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
-  else()
-   ...
-  endif()
-#]=======================================================================]
-
-function(find_package_message pkg msg details)
-  # Avoid printing a message repeatedly for the same find result.
-  if(NOT ${pkg}_FIND_QUIETLY)
-    string(REPLACE "\n" "" details "${details}")
-    set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
-    if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
-      # The message has not yet been printed.
-      message(STATUS "${msg}")
-
-      # Save the find details in the cache to avoid printing the same
-      # message again.
-      set("${DETAILS_VAR}" "${details}"
-        CACHE INTERNAL "Details about finding ${pkg}")
-    endif()
-  endif()
-endfunction()
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER simon987 <me@simon987.net>

 RUN apt update
 RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
- curl libtiff5 libpng16-16
+ curl libtiff5 libpng16-16 libpcre3

 RUN mkdir -p /usr/share/tessdata && \
    cd /usr/share/tessdata/ && \
@@ -16,4 +16,7 @@ RUN mkdir -p /usr/share/tessdata && \

 ADD sist2 /root/sist2

+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
 ENTRYPOINT ["/root/sist2"]
--- a/Docker/build.sh
+++ b/Docker/build.sh
@@ -1,10 +1,14 @@
-rm ./sist2
-cp ../sist2 .
+rm ./sist2 sist2_debug
+cp ../sist2.gz .
+gzip -d sist2.gz
 strip sist2

 version=$(./sist2 --version)

 echo "Version ${version}"
 docker build . -t simon987/sist2:${version} -t simon987/sist2:latest
+
 docker push simon987/sist2:${version}
 docker push simon987/sist2:latest
+
+docker run --rm simon987/sist2 -v
--- a/DockerArm64/Dockerfile
+++ b/DockerArm64/Dockerfile
@@ -0,0 +1,22 @@
+FROM ubuntu:19.10
+MAINTAINER simon987 <me@simon987.net>
+
+RUN apt update
+RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
+ curl libtiff5 libpng16-16 libpcre3
+
+RUN mkdir -p /usr/share/tessdata && \
+    cd /usr/share/tessdata/ && \
+    curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
+    curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
+    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
+    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
+    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
+    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
+
+ADD sist2_arm64 /root/sist2
+
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
+ENTRYPOINT ["/root/sist2"]
--- a/DockerArm64/build.sh
+++ b/DockerArm64/build.sh
@@ -0,0 +1,13 @@
+rm ./sist2_arm64
+cp ../sist2_arm64.gz .
+gzip -d sist2_arm64.gz
+
+version=$(./sist2_arm64 --version)
+
+echo "Version ${version}"
+docker build . -t simon987/sist2-arm64:"${version}" -t simon987/sist2-arm64:latest
+
+docker push simon987/sist2-arm64:"${version}"
+docker push simon987/sist2-arm64:latest
+
+docker run --rm simon987/sist2-arm64 -v
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
 [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
-[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)
+[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/sist2/simon987_sist2/)
+
+**Demo**: [sist2.simon987.net](https://sist2.simon987.net/?i=Demo%20files)

 # sist2

@@ -8,95 +10,83 @@ sist2 (Simple incremental search tool)

 *Warning: sist2 is in early development*

+![sist2.png](docs/sist2.png)
+
 ## Features

 * Fast, low memory usage, multi-threaded
+* Mobile-friendly Web interface
 * Portable (all its features are packaged in a single executable)
-* Extracts text from common file types \*
+* Extracts text and metadata from common file types \*
 * Generates thumbnails \*
 * Incremental scanning
-* Automatic tagging from file attributes via [user scripts](scripting/README.md)
+* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
 * Recursive scan inside archive files \*\*
 * OCR support with tesseract \*\*\*
+* Stats page & disk utilisation visualization


 \* See [format support](#format-support)    
 \*\* See [Archive files](#archive-files)    
 \*\*\* See [OCR](#ocr)    

+![stats](docs/stats.png)
+
+
 ## Getting Started

-1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
-1. 
+1. Have an Elasticsearch (>= 6.X.X) instance running
+    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
+    1. *(or)* Run using docker:
+        ```bash
+       docker run -d --name es1 --net sist2_net -p 9200:9200 \
+            -e "discovery.type=single-node" elasticsearch:7.5.2
+        ```
+    1. *(or)* Run using docker-compose:
+        ```yaml
+          elasticsearch:
+            image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
+            environment:
+              - discovery.type=single-node
+              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
+        ```
+1. Download sist2 executable
    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
-    1. *(or)* Download an [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
+    1. *(or)* Download a [development snapshot](https://files.simon987.net/sist2/simon987_sist2/) *(Not recommended!)*
    1. *(or)* `docker pull simon987/sist2:latest`

+1. See [Usage guide](docs/USAGE.md)
+   

 \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)    
-\* *Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)


 ## Example usage

-See help page `sist2 --help` for more details.
+See [Usage guide](docs/USAGE.md) for more details

-**Scan a directory**
-```bash
-sist2 scan ~/Documents -o ./orig_idx/
-sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
-sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
-```
-
-**Push index to Elasticsearch or file**
-```bash
-sist2 index --force-reset ./my_idx
-sist2 index --print ./my_idx > raw_documents.ndjson
-```
-
-**Start web interface**
-```bash
-sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
-```
-
-### Use sist2 with docker
-
-**scan**
-```bash
-docker run -it \
-    -v /path/to/files/:/files \
-    -v $PWD/out/:/out \
-    simon987/sist2 scan -t 4 /files -o /out/my_idx1
-```
-**index**
-```bash
-docker run -it --network host\
-    -v $PWD/out/:/out \
-    simon987/sist2 index /out/my_idx1
-```
-
-**web**
-```bash
-docker run --rm --network host -d --name sist2\
-    -v $PWD/out/my_idx:/idx \
-    -v $PWD/my/files:/files
-    simon987/sist2 web --bind 0.0.0.0 /idx
-docker stop sist2
-```
+1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
+1. Push index to Elasticsearch: `sist2 index ./docs_idx`
+1. Start web interface: `sist2 web ./docs_idx`


 ## Format support

 File type | Library | Content | Thumbnail | Metadata
 :---|:---|:---|:---|:---
-pdf,xps,cbz,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
-`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
-`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
-`image/*` | ffmpeg | - | yes, `jpeg` | `EXIF:Artist`, `EXIF:ImageDescription` |
+pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
+cbz,cbr | *(none)* | - | yes | - |
+`audio/*` | ffmpeg | - | yes | ID3 tags |
+`video/*` | ffmpeg | - | yes | title, comment, artist |
+`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
+raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags |
 ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 `text/plain` | *(none)* | yes | no | - |
+html, xml | *(none)* | yes | no | - |
 tar, zip, rar, 7z, ar ...  | Libarchive | yes\* | - | no |
-docx, xlsx, pptx | libOPC | yes | no | no |
+docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
+doc (MS Word 97-2003) | antiword | yes | yes | author, title |
+mobi, azw, azw3 | libmobi | yes | no | author, title |

 \* *See [Archive files](#archive-files)*
 
@@ -106,22 +96,20 @@ they were directly in the file system. Recursive (archives inside archives)
 scan is also supported.

 **Limitations**:
-* Parsing media files with formats that require
-*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
+* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) 
+  is limitted (see `--mem-buffer` option)
 * Archive files are scanned sequentially, by a single thread. On systems where
 **sist2** is not I/O bound, scans might be faster when larger archives are split
 into smaller parts.
 
-To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
- 
 
 ### OCR

-You can enable OCR support for pdf,xps,cbz,fb2,epub file types with the
+You can enable OCR support for pdf,xps,fb2,epub file types with the
 `--ocr <lang>` option. Download the language data files with your
 package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).

-The `simon987/sist2` github image comes with common languages 
+The `simon987/sist2` image comes with common languages 
 (hin, jpn, eng, fra, rus, spa) pre-installed.

 Examples
@@ -134,22 +122,17 @@ sist2 scan --ocr eng ~/Books/Textbooks/
 ## Build from source

 You can compile **sist2** by yourself if you don't want to use the pre-compiled
-binaries.
+binaries (GCC 7+ required).

 1. Install compile-time dependencies

-    *(Debian)*
   ```bash
-    apt install git cmake pkg-config libglib2.0-dev \
-        libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
-        libcurl-dev libbz2-dev yasm libharfbuzz-dev ragel \
-        libarchive-dev libtiff5 libpng16-16 libpango1.0-dev
+   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libmagic libraw curl[core,ssl] jbig2dec brotli libmupdf
   ```

 2. Build
    ```bash
-    git clone --recurse-submodules https://github.com/simon987/sist2
-    ./scripts/get_static_libs.sh
-    cmake .
+    git clone --recursive https://github.com/simon987/sist2/
+    cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
    make
    ```
--- a/1
+++ b/1
--- a/1
+++ b/1
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -1,8 +1,20 @@
 #!/usr/bin/env bash

-./scripts/get_static_libs.sh
+VCPKG_ROOT="/vcpkg"

-cmake .
-make
+rm *.gz
+
+git submodule update --init --recursive
+
+rm -rf CMakeFiles CMakeCache.txt
+cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 33
 strip sist2
-strip sist2_scan
+mv sist2 sist2-x64-linux
+
+rm -rf CMakeFiles CMakeCache.txt
+cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 33
+cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
+mv sist2_debug sist2-x64-linux-debug
+tar -czf sist2-x64-linux-debug.tar.gz sist2-x64-linux-debug libasan.so.2
--- a/ci/build_arm64.sh
+++ b/ci/build_arm64.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+VCPKG_ROOT="/vcpkg"
+
+rm *.gz
+
+git submodule update --init --recursive
+
+rm -rf CMakeFiles CMakeCache.txt
+cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 4
+strip sist2
+mv sist2 sist2-arm64-linux
--- a/demo.gif
+++ b/demo.gif
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -0,0 +1,403 @@
+# Usage
+
+*More examples (specifically with docker/compose) are in progress*
+
+* [scan](#scan)
+    * [options](#scan-options)
+    * [examples](#scan-examples)
+    * [index format](#index-format)
+* [index](#index)
+    * [options](#index-options)
+    * [examples](#index-examples)
+* [web](#web)
+    * [options](#web-options)
+    * [examples](#web-examples)
+    * [rewrite_url](#rewrite_url)
+    * [link to specific indices](#link-to-specific-indices)
+* [exec-script](#exec-script)
+* [tagging](#tagging)
+* [sidecar files](#sidecar-files)
+
+```
+Usage: sist2 scan [OPTION]... PATH
+   or: sist2 index [OPTION]... INDEX
+   or: sist2 web [OPTION]... INDEX...
+   or: sist2 exec-script [OPTION]... INDEX
+Lightning-fast file system indexer and search tool.
+
+    -h, --help                    show this help message and exit
+    -v, --version                 Show version and exit
+    --verbose                     Turn on logging
+    --very-verbose                Turn on debug messages
+
+Scan options
+    -t, --threads=<int>           Number of threads. DEFAULT=1
+    -q, --quality=<flt>           Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
+    --size=<int>                  Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
+    --content-size=<int>          Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
+    --incremental=<str>           Reuse an existing index and only scan modified files.
+    -o, --output=<str>            Output directory. DEFAULT=index.sist2/
+    --rewrite-url=<str>           Serve files from this url instead of from disk.
+    --name=<str>                  Index display name. DEFAULT: (name of the directory)
+    --depth=<int>                 Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
+    --archive=<str>               Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
+    --ocr=<str>                   Tesseract language (use tesseract --list-langs to see which are installed on your machine)
+    -e, --exclude=<str>           Files that match this regex will not be scanned
+    --fast                        Only index file names & mime type
+    --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
+    --mem-buffer=<int>            Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
+
+Index options
+    -t, --threads=<int>           Number of threads. DEFAULT=1
+    --es-url=<str>                Elasticsearch url with port. DEFAULT=http://localhost:9200
+    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
+    -p, --print                   Just print JSON documents to stdout.
+    --script-file=<str>           Path to user script.
+    --mappings-file=<str>         Path to Elasticsearch mappings.
+    --settings-file=<str>         Path to Elasticsearch settings.
+    --async-script                Execute user script asynchronously.
+    --batch-size=<int>            Index batch size. DEFAULT: 100
+    -f, --force-reset             Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
+
+Web options
+    --es-url=<str>                Elasticsearch url. DEFAULT=http://localhost:9200
+    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
+    --bind=<str>                  Listen on this address. DEFAULT=localhost:4090
+    --auth=<str>                  Basic auth in user:password format
+    --tag-auth=<str>              Basic auth in user:password format for tagging
+
+Exec-script options
+    --es-url=<str>                Elasticsearch url. DEFAULT=http://localhost:9200
+    --es-index=<str>              Elasticsearch index name. DEFAULT=sist2
+    --script-file=<str>           Path to user script.
+    --async-script                Execute user script asynchronously.
+Made by simon987 <me@simon987.net>. Released under GPL-3.0
+```
+
+## Scan
+
+### Scan options
+
+* `-t, --threads` 
+      Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
+* `-q, --quality` 
+    Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
+* `--size` 
+    Thumbnail size in pixels.
+* `--content-size` 
+    Number of bytes of text to be extracted from the content of files (plain text and PDFs).
+    Repeated whitespace and special characters do not count toward this limit.
+* `--incremental`
+    Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
+    will be copied to the new index and will not be parsed again.
+* `-o, --output` Output directory. 
+* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) 
+* `--name` Set the `name` option for the web module
+* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
+* `--archive` Archive file mode.
+    * skip: Don't parse
+    * list: Only get file names as text
+    * shallow: Don't parse archives inside archives.
+    * recurse: Scan archives recursively (default)
+* `--ocr` See [OCR](../README.md#OCR)
+* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any 
+    part of the full absolute path.
+    
+    Examples: 
+    * `-e ".*\.ttf"`: Ignore ttf files
+    * `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
+    * `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
+    * `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
+    * `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
+     `/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
+* `--fast` Only index file names and mime type
+* `--treemap-threshold` Directories smaller than (`treemap-threshold` * `<total size of the index>`)
+    will not be considered for the disk utilisation visualization; their size will be added to
+    the parent directory. If the parent directory is still smaller than the threshold, it will also be "merged upwards"
+    and so on.
+    
+    In effect, smaller `treemap-threshold` values will yield a more detailed 
+    (but also a more cluttered and harder to read) visualization. 
+    
+* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files 
+    larger than this number will be read sequentially and no *seek* operations will be supported.
+
+    To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
+
+### Scan examples
+
+Simple scan
+```bash
+sist2 scan ~/Documents
+
+sist2 scan \
+    --threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
+    --name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
+    ~/Documents -o ./documents.idx/
+```
+
+Incremental scan
+```
+sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
+```
+
+### Index format
+
+A typical `binary` type index structure looks like this:
+```
+documents.idx/
+├── descriptor.json
+├── _index_139965416830720
+├── _index_139965425223424
+├── _index_139965433616128
+├── _index_139965442008832
+├── _index_139965442008832
+├── treemap.csv
+├── agg_mime.csv
+├── agg_date.csv
+├── add_size.csv
+├── thumbs/
+|   ├── data.mdb
+|   └── lock.mdb
+├── tags/
+|   ├── data.mdb
+|   └── lock.mdb
+└── meta/
+    ├── data.mdb
+    └── lock.mdb
+```
+
+The `_index_*` files contain the raw binary index data and are not meant to be
+read by other applications. The format is generally compatible across different 
+sist2 versions.
+
+The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
+database containing the thumbnails.
+
+The `descriptor.json` file contains general information about the index. The 
+following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
+
+The `.csv` are pre-computed aggregations necessary for the stats page.
+
+
+*Advanced usage*
+
+Instead of using the `scan` module, you can also import an index generated
+by a third party application. The 'external' index must have the following format:
+
+```
+my_index/
+├── descriptor.json
+├── _index_0
+└── thumbs/
+|   ├── data.mdb
+|   └── lock.mdb
+└── meta/
+    └── <empty>
+```
+
+*descriptor.json*:
+```json
+{
+    "uuid": "<valid UUID4>",
+    "version": "_external_v1",
+    "root": "(optional)",
+    "name": "<name>",
+    "rewrite_url": "(optional)",
+    "type": "json",
+    "timestamp": 1578971024
+}
+```
+
+*_index_0*: NDJSON format (One json object per line)
+
+```json
+{
+  "_id": "unique uuid for the file",
+  "index": "index uuid4 (same one as descriptor.json!)",
+  "mime": "application/x-cbz",
+  "size": 14341204,
+  "mtime": 1578882996,
+  "extension": "cbz",
+  "name": "my_book",
+  "path": "path/to/books",
+  "content": "text contents of the book",
+  "title": "Title of the book",
+  "tag": ["genre.fiction", "author.someguy", "etc..."],
+  "_keyword": [
+    {"k": "ISBN", "v": "ABCD34789231"}
+  ],
+  "_text": [
+    {"k": "other", "v": "This will be indexed as text"}
+  ]
+}
+```
+
+You can find the full list of supported fields [here](../src/io/serialize.c#L90)
+
+The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
+The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
+
+
+*thumbs/*:
+
+LMDB key-value store. Keys are **binary** 16-byte md5 hash* (`_id` field)
+and values are raw image bytes.
+
+*\* Hash is calculated from the full path of the file, including the extension, relative to the index root*
+
+Importing an external `binary` type index is technically possible but
+it is currently unsupported and has no guaranties of back/forward compatibility.
+
+
+## Index
+### Index options
+ * `--es-url` 
+ Elasticsearch url and port. If you are using docker, make sure that both containers are on the
+ same network.
+ * `--es-index` 
+    Elasticsearch index name. DEFAULT=sist2
+ * `-p, --print` 
+    Print index in JSON format to stdout.
+ * `--script-file` 
+    Path to user script. See [Scripting](scripting.md).
+ * `--mappings-file`
+    Path to custom Elasticsearch mappings. If none is specified, [the bundled mappings](https://github.com/simon987/sist2/tree/master/schema) will be used.
+ * `--settings-file`
+    Path to custom Elasticsearch settings. *(See above)*
+ * `--async-script` 
+    Use `wait_for_completion=false` elasticsearch option while executing user script.
+     (See [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/tasks.html))
+ * `--batch-size=<int>` 
+    Index batch size. Indexing is generally faster with larger batches, but payloads that
+    are too large will fail and additional overhead for retrying with smaller sizes may slow
+    down the process.
+ * `-f, --force-reset` 
+    Reset Elasticsearch mappings and settings.
+    
+### Index examples
+
+**Push to elasticsearch**
+```bash
+sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
+sist2 index ./my_index/
+```
+
+**Save index in JSON format**
+```bash
+sist2 index --print ./my_index/ > my_index.ndjson
+```
+
+**Inspect contents of an index**
+```bash
+sist2 index --print ./my_index/ | jq | less
+```
+
+## Web
+
+### Web options
+ * `--es-url=<str>` Elasticsearch url.
+ * `--es-index` 
+    Elasticsearch index name. DEFAULT=sist2
+ * `--bind=<str>` Listen on this address.
+ * `--auth=<str>` Basic auth in user:password format
+ * `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the 
+    `--auth` argument, but authentication is only applied the `/tag/` endpoint.
+ 
+### Web examples
+
+**Single index**
+```bash
+sist2 web --auth admin:hunter2 --bind 0.0.0.0:8888 my_index
+```
+
+**Multiple indices**
+```bash
+# Indices will be displayed in this order in the web interface
+sist2 web index1 index2 index3 index4
+```
+
+### rewrite_url
+
+When the `rewrite_url` field is not empty, the web module ignores the `root`
+field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
+instead of serving the file from disk. 
+Both the `root` and `rewrite_url` fields are safe to manually modify from the 
+`descriptor.json` file.
+
+### Link to specific indices
+
+To link to specific indices, you can add a list of comma-separated index name to 
+the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
+not displayed.
+
+## exec-script
+
+The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
+
+
+# Tagging
+
+### Manual tagging
+
+You can modify tags of individual documents directly from the 
+ `web` interface. Note that you can setup authentication for this feature
+ with the `--tag-auth` option (See [web options](#web-options))
+
+![manual_tag](manual_tag.png)
+
+Tags that are manually added are saved both in the 
+ index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing, 
+ they are read from the index and automatically applied.
+ 
+You can safely copy the `/tags/` database to another index.
+
+See [Automatic tagging](#automatic-tagging) for information about tag 
+ hierarchies and tag colors.
+
+\* *It can take a few seconds to take effect in new search queries, and the page needs 
+    to be reloaded for the tags tab to update*
+
+
+### Automatic tagging
+
+See [scripting](scripting.md) documentation.
+
+# Sidecar files
+
+When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the 
+original document's metadata. Sidecar metadata files will also work inside archives.
+Sidecar files themselves are not saved in the index.
+
+This feature is useful to leverage third-party applications such as speech-to-text or
+OCR to add additional metadata to a file.
+
+**Example**
+
+```
+~/Documents/
+├── Video.mp4
+└── Video.mp4.s2meta
+```
+
+The sidecar file must have exactly the same file path and the `.s2meta` suffix.
+
+`Video.mp4.s2meta`:
+```json
+{
+  "content": "This sidecar file will overwrite some metadata fields of Video.mp4",
+  "author": "Some author",
+  "duration": 12345,
+  "bitrate": 67890,
+  "some_arbitrary_field": [1,2,3]
+}
+```
+
+```
+sist2 scan ~/Documents -o ./docs.idx
+sist2 index ./docs.idx
+```
+
+*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
+it is not currently possible to restore both manual tags and sidecar tags without user scripts
+while reindexing.
--- a/scripting/genre_example.png
+++ b/scripting/genre_example.png
--- a/docs/manual_tag.png
+++ b/docs/manual_tag.png
--- a/scripting/README.md
+++ b/scripting/README.md
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
 ArrayList tags = ctx._source.tag = new ArrayList();

 if (ctx._source?.genre != null) {
-    tags.add("genre." + ctx._source.genre.toLowerCase())
+    tags.add("genre." + ctx._source.genre.toLowerCase());
 }
 ```

@@ -54,6 +54,11 @@ script.painless.regex.enabled: true
 ```
 Or, if you're using docker add `-e "script.painless.regex.enabled=true"`

+**Tag color**
+
+You can specify the color for an individual tag by appending an 
+hexadecimal color code (`#RRGGBBAA`) to the tag name.
+
 ### Examples

 If `(20XX)` is in the file name, add the `year.<year>` tag:
@@ -62,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();

 Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
 if (m.find()) {
-    tags.add("year." + m.group(1))
+    tags.add("year." + m.group(1));
 }
 ```

@@ -106,12 +111,32 @@ if (ctx._source.path != "") {
 }
 ```

-Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
+Parse `EXIF:F Number` tag
 ```Java
-ArrayList tags = ctx._source.tag = new ArrayList();
-
-if (ctx._source.path != "") {
-    String[] names = ctx._source.path.splitOnToken('/');
-    tags.add("studio." + names[names.length-1]);
+if (ctx._source?.exif_fnumber != null) {
+    String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
+    String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
+    if (aperture == "NaN") {
+        aperture = "0,0";
+    }
+    tags.add("Aperture.f/" + aperture.replace(".", ","));
 }
 ```
+
+Display year and months from `EXIF:DateTime` tag
+```Java
+if (ctx._source?.exif_datetime != null) {
+    SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
+    Date date = parser.parse(ctx._source.exif_datetime);
+
+    SimpleDateFormat yp = new SimpleDateFormat("yyyy");
+    SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
+
+    String year = yp.format(date);
+    String month = mp.format(date);
+
+    tags.add("Month." + month);
+    tags.add("Year." + year);
+}
+
+```
--- a/docs/sist2.png
+++ b/docs/sist2.png
--- a/docs/stats.png
+++ b/docs/stats.png
--- a/include/mce/config.h
+++ b/include/mce/config.h
@@ -1,53 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
- notice, this list of conditions and the following disclaimer in 
- the documentation and/or other materials provided with the 
- distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
- may be used to endorse or promote products derived from this 
- software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/**@file config/mce/config.h
- */
-#ifndef MCE_CONFIG_H
-#define MCE_CONFIG_H
-
-#include <libxml/xmlstring.h>
-#include <stdio.h>
-#include <plib/plib.h>
-#include <assert.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MCE_NAMESPACE_SUBSUMPTION_ENABLED 0
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif  
-
-#endif /* MCE_CONFIG_H */
--- a/include/mce/helper.h
+++ b/include/mce/helper.h
@@ -1,189 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file mce/helper.h
-Helper functions needed by mce/textreader.h and mce/textwriter.h to implement MCE:
- mceQNameLevelAdd(), mceQNameLevelLookup() and mceQNameLevelCleanup() maintain a set of mceQNameLevel_t tuples.
- mceQNameLevelPush() and mceQNameLevelPopIfMatch() maintain a stack of mceQNameLevel_t tuples.
- mceCtxInit(), mceCtxCleanup() and mceCtxUnderstandsNamespace() manage a context which holds all information needed to do MCE proprocessing.
- */
-#include <mce/config.h>
-
-#ifndef MCE_HELPER_H
-#define MCE_HELPER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-    /**
-      Tiple (ns, ln, level).
-    */
-    typedef struct MCE_QNAME_LEVEL {
-        xmlChar *ns;
-        xmlChar *ln;
-        puint32_t level;
-        puint32_t flag; // used by mceTextWriter
-    } mceQNameLevel_t;
-
-    /**
-     */
-    typedef enum MCE_SKIP_STATE_ENUM {
-        MCE_SKIP_STATE_IGNORE,
-        MCE_SKIP_STATE_ALTERNATE_CONTENT,
-        MCE_SKIP_STATE_CHOICE_MATCHED
-    } mceSkipState_t;
-
-    /**
-     Represents an intervall of levels which are "skipped" i.e. ignored.
-     */
-    typedef struct MCE_SKIP_ITEM {
-        puint32_t level_start;
-        puint32_t level_end;
-        mceSkipState_t state;
-    } mceSkipItem_t;
-
-    /**
-      Either represents a set of (ns, ln, level) triples.
-    */
-    typedef struct MCE_QNAME_LEVEL_SET {
-        mceQNameLevel_t *list_array;
-        puint32_t list_items;
-        puint32_t max_level;
-    } mceQNameLevelSet_t;
-
-    /**
-     The skip stack.
-     */
-    typedef struct MCE_SKIP_STACK {
-        mceSkipItem_t *stack_array;
-        puint32_t stack_items;
-    } mceSkipStack_t;
-
-
-    typedef enum MCE_ERROR_ENUM {
-        MCE_ERROR_NONE,
-        MCE_ERROR_XML,
-        MCE_ERROR_MUST_UNDERSTAND,
-        MCE_ERROR_VALIDATION,
-        MCE_ERROR_MEMORY
-    } mceError_t;
-
-    /**
-      Holds all information to do MCE preprocessing.
-    */
-    typedef struct MCE_CONTEXT {
-        mceQNameLevelSet_t ignorable_set;
-        mceQNameLevelSet_t understands_set;
-        mceQNameLevelSet_t processcontent_set;
-        mceQNameLevelSet_t suspended_set;
-#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
-        mceQNameLevelSet_t subsume_namespace_set;
-        mceQNameLevelSet_t subsume_exclude_set;
-        mceQNameLevelSet_t subsume_prefix_set;
-#endif
-        mceSkipStack_t skip_stack;
-        mceError_t error;
-        pbool_t mce_disabled;        
-        puint32_t suspended_level;
-    } mceCtx_t;
-
-    /**
-      Add a new tiple (ns, ln, level) to the triple set \c qname_level_set.
-      The \c ns_sub string is optional and will not be touched.
-    */
-    pbool_t mceQNameLevelAdd(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, puint32_t level);
-
-    /**
-      Lookup a tiple (ns, ln, level) via \c ns and \c ln. If \c ignore_ln is PTRUE then the first tiple matching \c ns will be returned.
-    */
-    mceQNameLevel_t* mceQNameLevelLookup(mceQNameLevelSet_t *qname_level_set, const xmlChar *ns, const xmlChar *ln, pbool_t ignore_ln);
-
-    /**
-      Remove all triples (ns, ln, level) where the level greater or equal to \c level.
-    */
-    pbool_t mceQNameLevelCleanup(mceQNameLevelSet_t *qname_level_set, puint32_t level);
-
-    /**
-      Push a new skip intervall (level_start, level_end, state) on the stack \c skip_stack.
-    */
-    pbool_t mceSkipStackPush(mceSkipStack_t *skip_stack, puint32_t level_start, puint32_t level_end, mceSkipState_t state);
-
-    /**
-      Pop the intervall (ns, ln, level) from the stack \c qname_level_array.
-    */
-    void mceSkipStackPop(mceSkipStack_t *skip_stack);
-
-    /**
-     Returns top item or NULL.
-     */
-    mceSkipItem_t *mceSkipStackTop(mceSkipStack_t *skip_stack);
-
-    /**
-     Returns TRUE, if the \c level is in the top skip intervall.
-     */
-    pbool_t mceSkipStackSkip(mceSkipStack_t *skip_stack, puint32_t level);
-
-    /**
-      Initialize the mceCtx_t \c ctx.
-    */
-    pbool_t mceCtxInit(mceCtx_t *ctx);
-
-    /**
-      Cleanup, i.e. release all resourced from the mceCtx_t \c ctx.
-    */
-    pbool_t mceCtxCleanup(mceCtx_t *ctx);
-
-    /**
-      Register the namespace \ns in \c ctx.
-    */
-    pbool_t mceCtxUnderstandsNamespace(mceCtx_t *ctx, const xmlChar *ns);
-
-    /**
-     Register the namespace \ns in \c ctx.
-     */
-    pbool_t mceCtxSuspendProcessing(mceCtx_t *ctx, const xmlChar *ns, const xmlChar *ln);
-    
-
-
-#if (MCE_NAMESPACE_SUBSUMPTION_ENABLED)
-    /**
-    Subsume namespace \c ns_new with \c ns_old.
-     */
-    pbool_t mceCtxSubsumeNamespace(mceCtx_t *ctx, const xmlChar *prefix_new, const xmlChar *ns_new, const xmlChar *ns_old);
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* MCE_HELPER_H */
--- a/include/mce/textreader.h
+++ b/include/mce/textreader.h
@@ -1,464 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file mce/textreader.h
- 
- */
-#ifndef MCE_TEXTREADER_H
-#define MCE_TEXTREADER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-    /**
-      A handle to an MCE-aware libxml2 xmlTextReader.
-    */
-    typedef struct MCE_TEXTREADER mceTextReader_t;
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-
-#include <mce/config.h>
-#include <opc/opc.h>
-#include <mce/helper.h>
-#include <libxml/xmlwriter.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-    struct MCE_TEXTREADER {
-        xmlTextReaderPtr reader;
-        mceCtx_t mceCtx;
-    };
-
-    /**
-      Wrapper around an libxml2 xmlTextReaderRead function.
-      \see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderRead
-    */
-    int mceTextReaderRead(mceTextReader_t *mceTextReader);
-
-    /**
-      Wrapper around a libxml2 xmlTextReaderNext function.
-      \see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderNext
-    */
-    int mceTextReaderNext(mceTextReader_t *mceTextReader);
-
-    /** 
-      Creates an mceTextReader from an XmlTextReader. 
-      \code
-      mceTextReader reader;
-      mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
-      // reader is ready to use.
-      mceTextReaderCleanup(&reader);
-      \endcode
-      \see http://xmlsoft.org/html/libxml-xmlreader.html#xmlNewTextReaderFilename
-    */
-    int mceTextReaderInit(mceTextReader_t *mceTextReader, xmlTextReaderPtr reader);
-
-    /**
-      Cleanup MCE reader, i.e. free all resources. Also calls xmlTextReaderClose and xmlFreeTextReader.
-      \see http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderClose
-      \see http://xmlsoft.org/html/libxml-xmlreader.html#xmlFreeTextReader
-    */
-    int mceTextReaderCleanup(mceTextReader_t *mceTextReader);
-
-    /** 
-      Reads all events \c mceTextReader and pipes them to \writer.
-      \code
-      mceTextReader reader;
-      mceTextReaderInit(&reader, xmlNewTextReaderFilename("sample.xml"));
-      mceTextReaderUnderstandsNamespace(&reader, _X("http://myextension"));
-      xmlTextWriterPtr writer=xmlNewTextWriterFilename("out.xml", 0);
-      mceTextReaderDump(&reader, writer, P_FALSE);
-      xmlFreeTextWriter(writer);
-      mceTextReaderCleanup(&reader);
-      \endcode
-      */
-    int mceTextReaderDump(mceTextReader_t *mceTextReader, xmlTextWriter *writer, pbool_t fragment);
-
-    /**
-      Registers an MCE namespace.
-      \see mceTextReaderDump()
-      */
-    int mceTextReaderUnderstandsNamespace(mceTextReader_t *mceTextReader, const xmlChar *ns);
-
-    /**
-     Disable MCE processing.
-     \return Returns old value.
-     */
-    pbool_t mceTextReaderDisableMCE(mceTextReader_t *mceTextReader, pbool_t flag);
-
-
-    /**
-     Signal an error to the MCE processor.
-     */
-    void mceRaiseError(xmlTextReader *reader, mceCtx_t *ctx, mceError_t error, const xmlChar *str, ...);
-
-    /**
-        Internal function which does the MCE postprocessing. E.g. mceTextReaderRead() is implemented as
-        \code
-        mceTextReaderPostprocess(mceTextReader->reader, &mceTextReader->mceCtx, xmlTextReaderRead(mceTextReader->reader))
-        \endcode
-        This function is exposed to make existing libxm2 xmlTextReader MCE aware.
-    */
-    int mceTextReaderPostprocess(xmlTextReader *reader, mceCtx_t *ctx, int ret);
-
-    /**
-     Get the error code.
-     */
-    mceError_t mceTextReaderGetError(mceTextReader_t *mceTextReader);
-
-/**
- Helper macro to declare a start/end document block in a declarative way:
- \code
-  mce_start_document(reader) {
-  } mce_end_document(reader);
-  \endcode
-  \hideinitializer
-*/
-#define mce_start_document(_reader_) \
-    if (NULL!=(_reader_)) {            \
-        mceTextReaderRead(_reader_); \
-        if (0)                     
-
-/**
-  \see mce_start_document.
-  \hideinitializer
-*/
-#define mce_end_document(_reader_)   \
-    } /* if (NULL!=reader) */        \
-
-
-/**
-  Container for mce_start_element and mce_start_attribute declarations.
-  \see mce_match_element
-  \see mce_match_attribute
-  \hideinitializer
-  */
-#define mce_start_choice(_reader_)  \
-    if (0)                          
-
-/**
-  \see mce_start_choice
-  \hideinitializer
-  */
-#define mce_end_choice(_reader_) 
-
-
-/**
-  Skips the attributes. 
-  \see mce_match_element.
-  \hideinitializer
-*/
-#define mce_skip_attributes(_reader_) \
-    mce_start_attributes(_reader_) {  \
-    } mce_end_attributes(_reader_);   
-
-
-/**
-  Skips the attributes. 
-  \see mce_match_attribute.
-  \hideinitializer
-*/
-#define mce_skip_children(_reader_) \
-    mce_start_children(_reader_) {  \
-    } mce_end_children(_reader_);   
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_start_children(_reader_)                  \
-if (!xmlTextReaderIsEmptyElement((_reader_)->reader)) { \
-    mceTextReaderRead(_reader_); do {                 \
-        if (0)                                        
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_end_children(_reader_)                                                      \
-        else {                                                                          \
-            if (XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader)) { \
-                mceTextReaderNext(_reader_); /*skip unhandled element */                \
-            }                                                                           \
-        }                                                                               \
-    } while(XML_READER_TYPE_END_ELEMENT!=xmlTextReaderNodeType((_reader_)->reader) &&     \
-            XML_READER_TYPE_NONE!=xmlTextReaderNodeType((_reader_)->reader));             \
-} /* if (!xmlTextReaderIsEmptyElement(reader->reader)) */                               
-
-
-/**
-  Helper macro to match an element. Usefull for calling code in a seperate function:
-
-  \code
-  void handleElement(reader) {
-    mce_start_choice(reader) {
-        mce_start_element(reader, _X("ns"), _X("element")) {
-            
-        } mce_end_element(reader)
-    } mce_end_choice(reader);
-  }
-
-  void parse(reader) {
-    mce_start_document(reader) {
-      mce_start_element(reader, _X("ns"), _X("ln")) {
-        mce_skip_attributes(reader);
-        mce_start_children(reader) {
-           mce_match_element(reader, _X("ns"), _X("element")) {
-             handleElement(reader);
-           }
-        } mce_end_children(reader);
-      } mce_end_element();
-    } mce_end_document(reader);
-  }
-  \endcode
-  \hideinitializer
-*/
-#define mce_match_element(_reader_, ns, ln)                                                       \
-    } else if (XML_READER_TYPE_ELEMENT==xmlTextReaderNodeType((_reader_)->reader)                 \
-            && (NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
-            && (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) { 
-
-
-/**
- Helper macro to declare a element block in a declarative way:
- \code
-  mce_start_element(reader) {
-    mce_start_attributes(reader) {
-      mce_start_attribute(reader, _X("ns"), _X("lnA")) {
-         // code for handling lnA.
-      } mce_end_attribute(reader);
-      mce_start_attribute(reader, _X("ns"), _X("lnB")) {
-         // code for handling lnB.
-      } mce_end_attribute(reader);
-    } mce_end_attributes(reader);
-    mce_start_children(reader) {
-        mce_start_element(reader, _X("ns"), _X("lnA")) {
-         // code for handling lnA.
-        } mce_end_element(reader);
-        mce_start_element(reader, _X("ns"), _X("lnB")) {
-         // code for handling lnB.
-        } mce_end_element(reader);
-        mce_start_text(reader) {
-         // code for handling text.
-        } mce_end_text(reader);
-    } mce_end_children(reader);
-  } mce_end_element(reader);
-  \endcode
-  \hideinitializer
-*/
-#define mce_start_element(_reader_, ns, ln) \
-    mce_match_element(_reader_, ns, ln)     
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_end_element(_reader_) \
-    mceTextReaderNext(_reader_)   
-
-/**
-  Matches #TEXT without consuming it.
-  \hideinitializer
-*/
-#define mce_match_text(_reader_)                                                                   \
-    } else if (XML_READER_TYPE_TEXT==xmlTextReaderNodeType((_reader_)->reader)                     \
-            || XML_READER_TYPE_SIGNIFICANT_WHITESPACE==xmlTextReaderNodeType((_reader_)->reader)) {
-
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_start_text(_reader_) \
-    mce_match_text(_reader_)      
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_end_text(_reader_) \
-    mceTextReaderNext(_reader_)
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_start_attributes(_reader_)                            \
-    if (1==xmlTextReaderMoveToFirstAttribute((_reader_)->reader)) { \
-        do {                                                      \
-            if (0)                                                
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_end_attributes(_reader_)                                    \
-            else { /* skipped attribute */ }                            \
-        } while(1==xmlTextReaderMoveToNextAttribute((_reader_)->reader)); \
-    xmlTextReaderMoveToElement((_reader_)->reader); }                     
-
-/**
-  Helper macro to match an attribute. Usefull for calling code in a seperate function:
-
-  \code
-  void handleA(reader) {
-    mce_start_choice(reader) {
-        mce_start_attribute(reader, _X("ns"), _X("attr")) {
-
-        } mce_end_attribute(reader);
-    } mce_end_choice(reader);
-  }
-
-  void parse(reader) {
-    mce_start_document(reader) {
-      mce_start_element(reader, _X("ns"), _X("ln")) {
-        mce_start_attributes(reader) {
-           mce_match_attribute(reader, _X("ns"), _X("attr")) {
-             handleA(reader);
-           }
-        } mce_end_attributes(reader);
-        mce_skip_children(reader);
-      } mce_end_element();
-    } mce_end_document(reader);
-  }
-  \endcode
-  \hideinitializer
-*/
-#define mce_match_attribute(_reader_, ns, ln)                                                   \
-    } else if ((NULL==ns || 0==xmlStrcmp(ns, xmlTextReaderConstNamespaceUri((_reader_)->reader))) \
-            && (NULL==ln || 0==xmlStrcmp(ln, xmlTextReaderConstLocalName((_reader_)->reader)))) { 
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_start_attribute(_reader_, ns, ln) \
-    mce_match_attribute(_reader_, ns, ln) 
-
-/**
-  \see mce_start_element.
-  \hideinitializer
-*/
-#define mce_end_attribute(_reader_)
-
-
-/**
-  Error handling for MCE parsers.
-  \code
-   mce_start_element(&reader, NULL, _X("Default")) {
-       const xmlChar *ext=NULL;
-       const xmlChar *type=NULL;
-       mce_start_attributes(&reader) {
-           mce_start_attribute(&reader, NULL, _X("Extension")) {
-               ext=xmlTextReaderConstValue(reader.reader);
-           } mce_end_attribute(&reader);
-           mce_start_attribute(&reader, NULL, _X("ContentType")) {
-               type=xmlTextReaderConstValue(reader.reader);
-           } mce_end_attribute(&reader);
-       } mce_end_attributes(&reader);
-       mce_error_guard_start(&reader) {
-           mce_error(&reader, NULL==ext || ext[0]==0, MCE_ERROR_VALIDATION, "Missing @Extension attribute!");
-           mce_error(&reader, NULL==type || type[0]==0, MCE_ERROR_VALIDATION, "Missing @ContentType attribute!");
-           opcContainerType *ct=insertType(c, type, OPC_TRUE);
-           mce_error(&reader, NULL==ct, MCE_ERROR_MEMORY, NULL);
-           opcContainerExtension *ce=opcContainerInsertExtension(c, ext, OPC_TRUE);
-           mce_error(&reader, NULL==ce, MCE_ERROR_MEMORY, NULL);
-           mce_errorf(&reader, NULL!=ce->type && 0!=xmlStrcmp(ce->type, type), MCE_ERROR_VALIDATION, "Extension \"%s\" is mapped to type \"%s\" as well as \"%s\"", ext, type, ce->type);
-           ce->type=ct->type;
-       } mce_error_guard_end(&reader);
-       mce_skip_children(&reader);
-   } mce_end_element(&reader);
-  \endcode
-  \hideinitializer
-*/
-#define mce_error_guard_start(_reader_) if (MCE_ERROR_NONE==(_reader_)->mceCtx.error) do {
-
-/**
-  \see mce_error_guard_start
-  \hideinitializer
-*/
-#define mce_error_guard_end(_reader_)  } while(0)
-
-/**
-  Signal an error if guard if false.
-  \hideinitializer
-*/
-#define mce_error(_reader_, guard, err, msg) if (guard) { (_reader_)->mceCtx.error=(err); fprintf(stderr, (NULL!=msg?msg:#err));  continue; }
-
-/**
-  Signal an error if guard if false.
-  \hideinitializer
-*/
-#if defined(__GNUC__)
-#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), ##__VA_ARGS__ );  continue; }
-#else
-#define mce_errorf(_reader_, guard, err, msg, ...) if (guard) { mceRaiseError((_reader_)->reader, &(_reader_)->mceCtx, err, _X((NULL!=msg?msg:#err)), __VA_ARGS__ );  continue; }
-#endif
-
-/**
-  Only issues the error when in "strict mode".
-  \hideinitializer
-*/
-#define mce_error_strict mce_error
-
-/**
-  \see mce_error_strict
-  \hideinitializer
-*/
-#define mce_error_strictf mce_errorf
-
-
-/**
-  Marker for a MCE defintion.
-  \hideinitializer
-*/
-#define mce_def
-
-/**
-  Marker for a MCE reference.
-  \hideinitializer
-*/
-#define mce_ref(r) (r)
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* MCE_TEXTREADER_H */
--- a/include/mce/textwriter.h
+++ b/include/mce/textwriter.h
@@ -1,176 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file mce/textwriter.h
-
-*/
-#include <mce/config.h>
-#include <libxml/xmlwriter.h>
-#include <mce/helper.h>
-
-#ifndef MCE_TEXTWRITER_H
-#define MCE_TEXTWRITER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-/**
-  Default flags for an MCE namespace declaration.
-  */
-#define MCE_DEFAULT 0x0
-
-/**
-  Flags MCE namespace declaration "ignorable".
-  */
-#define MCE_IGNORABLE 0x1
-
-/**
-  Flags MCE namespace declaration "must understand".
-  */
-#define MCE_MUSTUNDERSTAND 0x2
-
-    /**
-      The MCE text writer context.
-      */
-    typedef struct MCE_TEXTWRITER_STRUCT mceTextWriter;
-
-    /**
-      Create a new MCE text writer.
-      \see http://xmlsoft.org/html/libxml-xmlIO.html#xmlOutputBufferCreateIO
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlNewTextWriter
-      */
-    mceTextWriter *mceTextWriterCreateIO(xmlOutputWriteCallback iowrite, xmlOutputCloseCallback  ioclose, void *ioctx, xmlCharEncodingHandlerPtr encoder);
-
-    /**
-      Helper which create a new MCE text writer for a FILE handle.
-      */
-    mceTextWriter *mceNewTextWriterFile(FILE *file);
-
-    /**
-      Free all resources for \w.
-      */
-    int mceTextWriterFree(mceTextWriter *w);
-
-    /**
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartDocument
-      */
-    int mceTextWriterStartDocument(mceTextWriter *w);
-
-    /**
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndDocument
-      */
-    int mceTextWriterEndDocument(mceTextWriter *w);
-
-    /**
-      Start a new XML element. If ns==NULL then there is no namespace and ""==ns means the default namespace.
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElement
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterStartElementNS
-      */
-    int mceTextWriterStartElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
-
-    /**
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterEndElement
-      */
-    int mceTextWriterEndElement(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
-
-    /**
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteString
-      */
-    int mceTextWriterWriteString(mceTextWriter *w, const xmlChar *content);
-
-    /**
-      Register a namespace. Must be called before mceTextWriterStartElement.
-      \see MCE_DEFAULT
-      \see MCE_IGNORABLE
-      \see MCE_MUSTUNDERSTAND
-      */
-    const xmlChar *mceTextWriterRegisterNamespace(mceTextWriter *w, const xmlChar *ns, const xmlChar *prefix, int flags);
-
-    /**
-      Register qname (ns, ln) as a "process content" element wrt. MCE. Must be called before mceTextWriterStartElement.
-      */
-    int mceTextWriterProcessContent(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln);
-
-    /**
-      Writes a formatted attribute.
-      \see http://xmlsoft.org/html/libxml-xmlwriter.html#xmlTextWriterWriteFormatAttribute
-      */
-    int mceTextWriterAttributeF(mceTextWriter *w, const xmlChar *ns, const xmlChar *ln, const char *value, ...);
-
-    /**
-      Starts an MCE alternate content section.
-      */
-    int mceTextWriterStartAlternateContent(mceTextWriter *w);
-
-    /**
-      Ends an MCE alternate content section.
-      */
-    int mceTextWriterEndAlternateContent(mceTextWriter *w);
-
-    /**
-      Start an MCE choice.
-      */
-    int mceTextWriterStartChoice(mceTextWriter *w, const xmlChar *ns);
-
-    /**
-      Ends an MCE choice.
-      */
-    int mceTextWriterEndChoice(mceTextWriter *w);
-
-    /**
-      Start an MCE fallback.
-      */
-    int mceTextWriterStartFallback(mceTextWriter *w);
-
-    /**
-      Ends an MCE fallback.
-      */
-    int mceTextWriterEndFallback(mceTextWriter *w);
-
-
-    /**
-      Returns the underlying xmlTextWriter.
-      */
-    xmlTextWriterPtr mceTextWriterIntern(mceTextWriter *w);
-
-    /**
-      Helper which create a new xmlTextWriterPtr for a FILE handle.
-      */
-    xmlTextWriterPtr xmlNewTextWriterFile(FILE *file);
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-
-#endif /* MCE_TEXTWRITER_H */
--- a/include/opc/config.h
+++ b/include/opc/config.h
@@ -1,189 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
- notice, this list of conditions and the following disclaimer in 
- the documentation and/or other materials provided with the 
- distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
- may be used to endorse or promote products derived from this 
- software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/**@file config/opc/config.h
- */
-#ifndef OPC_CONFIG_H
-#define OPC_CONFIG_H
-
-#include <libxml/xmlstring.h>
-#include <plib/plib.h>
-#include <assert.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif   
-
-
-/**
-  Assert expression e is true. Will be removed entirely in release mode.
-  \hideinitializer
- */
-#define OPC_ASSERT(e) assert(e)
-
-/**
-  Assert expression e is true. Expression will be executed in release mode too.
-  \hideinitializer
- */
-#ifdef NDEBUG
-#define OPC_ENSURE(e) (void)(e)
-#else
-#define OPC_ENSURE(e) assert(e)
-#endif
-
-
-/**
-  Constant for boolean true.
-  \hideinitializer
- */
-#define OPC_TRUE (0==0)
-
-/**
-  Constant for boolean false.
-  \hideinitializer
- */
-#define OPC_FALSE (0==1)
-
-    /** 
-      Boolean type.
-      \hideinitializer
-      */
-    typedef pbool_t opc_bool_t;
-
-    /** 
-      Type which represents an offset in e.g. a file.
-      \hideinitializer
-      */
-    typedef pofs_t opc_ofs_t;
-
-    /** 
-      8-bit unsigned integer.
-      \hideinitializer
-      */
-    typedef puint8_t opc_uint8_t;
-
-    /** 
-      16-bit unsigned integer.
-      \hideinitializer
-      */
-    typedef puint16_t opc_uint16_t;
-
-    /** 
-      32-bit unsigned integer.
-      \hideinitializer
-      */
-    typedef puint32_t opc_uint32_t;
-
-    /** 
-      64-bit unsigned integer.
-      \hideinitializer
-      */
-    typedef puint64_t opc_uint64_t;
-
-    /** 
-      8-bit signed integer.
-      \hideinitializer
-      */
-    typedef pint8_t opc_int8_t;
-
-    /** 
-      16-bit signed integer.
-      \hideinitializer
-      */
-    typedef pint16_t opc_int16_t;
-
-    /** 
-      32-bit signed integer.
-      \hideinitializer
-      */
-    typedef pint32_t opc_int32_t;
-
-    /** 
-      64-bit signed integer.
-      \hideinitializer
-      */
-    typedef pint64_t opc_int64_t;
-
-/**
-  Default size fo the deflate buffer used by zlib.
-  */
-#define OPC_DEFLATE_BUFFER_SIZE 4096
-
-/**
-  Max system path len.
-  */
-#define OPC_MAX_PATH 512
-
-    /**
-      Error codes for the OPC module.
-      */
-    typedef enum OPC_ERROR_ENUM {
-        OPC_ERROR_NONE,
-        OPC_ERROR_STREAM,
-        OPC_ERROR_SEEK, // can't seek
-        OPC_ERROR_UNSUPPORTED_DATA_DESCRIPTOR,
-        OPC_ERROR_UNSUPPORTED_COMPRESSION,
-        OPC_ERROR_DEFLATE,
-        OPC_ERROR_HEADER,
-        OPC_ERROR_MEMORY,
-        OPC_ERROR_XML, 
-        OPC_ERROR_USER // user triggered an abort
-    } opc_error_t;
-    
-    /**
-      Compression options for OPC streams.
-      */
-    typedef enum OPC_COMPRESSIONOPTION_ENUM {
-        OPC_COMPRESSIONOPTION_NONE,
-        OPC_COMPRESSIONOPTION_NORMAL,
-        OPC_COMPRESSIONOPTION_MAXIMUM,
-        OPC_COMPRESSIONOPTION_FAST,
-        OPC_COMPRESSIONOPTION_SUPERFAST
-    } opcCompressionOption_t;
-
-
-/**
-  Helper for debug logs.
-  \hideinitializer
-  */
-#define opc_logf printf
-
-/**
-  Abstraction for memset(m, 0, s).
-  \hideinitializer
- */
-#define opc_bzero_mem(m,s) memset(m, 0, s)
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif  
-
-#endif /* OPC_CONFIG_H */
--- a/include/opc/container.h
+++ b/include/opc/container.h
@@ -1,300 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/container.h
-
- The container.h module has the fundamental methods for dealing with ZIP-based OPC container. 
- 
- OPC container can be opened in READ-ONLY mode, WRITE-ONLY mode, READ/WRITE mode, TEMPLATE mode and TRANSITION mode. 
- The most notable mode is the READ/WRITE mode, which gives you concurrent stream-based READ and WRITE access to a 
- single ZIP-based OPC container. This is achieved without the use of temporary files by taking advantage of the 
- OPC specific “interleave” mode. \see http://standards.iso.org/ittf/PubliclyAvailableStandards/c051459_ISOIEC_29500-2_2008(E).zip
- 
- The TEMPLATE mode allows very fast customized "cloning" of ZIP-based OPC container by using "RAW access" to the ZIP streams. 
- The TRANSITION mode is a special version of the TEMPLATE mode, which allows transition-based READ/WRITE access to the 
- ZIP-based OPC container using a temporary file.
- 
- */
-#include <opc/config.h>
-#include <opc/file.h>
-
-#ifndef OPC_CONTAINER_H
-#define OPC_CONTAINER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-    /**
-     Handle to an OPC container created by \ref opcContainerOpen.
-     \see opcContainerOpen.
-     */
-    typedef struct OPC_CONTAINER_STRUCT opcContainer;
-    
-    /**
-     Modes for opcContainerOpen();
-     \see opcContainerOpen
-     */
-    typedef enum {
-        /**
-         Opens the OPC container denoted by \a fileName in READ-ONLY mode. The \a destName parameter must be \a NULL.
-         \hideinitializer
-         */
-        OPC_OPEN_READ_ONLY=0, 
-        /**
-         Opens the OPC container denoted by \a fileName in WRITE-ONLY mode. The \a destName parameter must be \a NULL.
-         \hideinitializer
-         */
-        OPC_OPEN_WRITE_ONLY=1,
-        /**
-         Opens the OPC container denoted by \a fileName in READ/WRITE mode. The \a destName parameter must be \a NULL.
-         \hideinitializer
-         */
-        OPC_OPEN_READ_WRITE=2,
-        /**
-         This mode will open the container denoted by \a fileName in READ-ONLY mode and the container denoted by 
-         \a destName in write-only mode. Any modifications will be written to the container denoted by \a destName 
-         and the unmodified streams from \a fileName will be written to \a destName on closing.
-         \warning Currently not implemented.
-         \hideinitializer
-         */
-        OPC_OPEN_TEMPLATE=3,
-        /**
-         Like the OPC_OPEN_TEMPLATE mode, but the \a destName will be renamed to the \a fileName on closing. If \a destName 
-         is \a NULL, then the name of the temporary file will be generated automatically.
-         \warning Currently not implemented.
-         \hideinitializer
-         */
-        OPC_OPEN_TRANSITION=4
-    } opcContainerOpenMode; 
-    
-    /** Modes for opcContainerClose.
-     \see opcContainerClose.
-     */
-    typedef enum {
-        /**
-         Close the OPC container without any further postprocessing.
-         \hideinitializer
-         */
-        OPC_CLOSE_NOW = 0,
-        /**
-         Close the OPC container and trim the file by removing unused fragments like e.g. 
-         deleted parts.
-         \hideinitializer
-         */
-        OPC_CLOSE_TRIM = 1,
-        /**
-         Close the OPC container like in \a OPC_CLOSE_TRIM mode, but additionally remove any 
-         "interleaved" parts by reordering them.
-         \warning Currently not implemented. Same semantic as OPC_CLOSE_TRIM.       
-         \hideinitializer
-         */
-        OPC_CLOSE_DEFRAG = 2
-    } opcContainerCloseMode;
-    
-    /**
-     Opens a ZIP-based OPC container.
-     @param[in] fileName. For more details see \ref opcContainerOpenMode.
-     @param[in] mode. For more details see \ref opcContainerOpenMode.
-     @param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
-     @param[in] destName. For more details see \ref opcContainerOpenMode.
-     @return \a NULL if failed. 
-     \see opcContainerOpenMode
-     \see opcContainerDump
-     */
-    opcContainer* opcContainerOpen(const xmlChar *fileName, 
-                                   opcContainerOpenMode mode, 
-                                   void *userContext, 
-                                   const xmlChar *destName);
-
-    /**
-     Opens a ZIP-based OPC container from memory.
-     @param[in] data. 
-     @param[in] data_len.
-     @param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
-     @param[in] mode. For more details see \ref opcContainerOpenMode.
-     @return \a NULL if failed. 
-     */
-    opcContainer* opcContainerOpenMem(const opc_uint8_t *data, opc_uint32_t data_len,
-                                      opcContainerOpenMode mode, 
-                                      void *userContext);
-
-    /**
-     Opens a ZIP-based OPC container from memory.
-     @param[in] ioread. 
-     @param[in] iowrite. 
-     @param[in] ioclose. 
-     @param[in] ioseek. 
-     @param[in] iotrim. 
-     @param[in] ioflush. 
-     @param[in] iocontext. 
-     @param[in] file_size. 
-     @param[in] userContext. Will not be modified by libopc. Can be used to e.g. store the "this" pointer for C++ bindings.
-     @param[in] mode. For more details see \ref opcContainerOpenMode.
-     @return \a NULL if failed. 
-     */
-    opcContainer* opcContainerOpenIO(opcFileReadCallback *ioread,
-                                     opcFileWriteCallback *iowrite,
-                                     opcFileCloseCallback *ioclose,
-                                     opcFileSeekCallback *ioseek,
-                                     opcFileTrimCallback *iotrim,
-                                     opcFileFlushCallback *ioflush,
-                                     void *iocontext,
-                                     pofs_t file_size,
-                                     opcContainerOpenMode mode, 
-                                     void *userContext);
-    
-    /**
-     Close an OPC container.
-     @param[in] c. \ref opcContainer openered by \ref opcContainerOpen.
-     @param[in] mode. For more information see \ref opcContainerCloseMode.
-     @return Non-zero if successful.
-     \see opcContainerOpen
-     \see opcContainerCloseMode
-     */
-    opc_error_t opcContainerClose(opcContainer *c, opcContainerCloseMode mode);
-    
-    /**
-     Returns the unmodified user context passed to \ref opcContainerOpen.
-     \see opcContainerOpen
-     */
-    void *opcContainerGetUserContext(opcContainer *c);
-    
-    /**
-     List all types, relations and parts of the container \a c to \a out.
-     \par Sample:
-     \include opc_dump.c
-     */
-    opc_error_t opcContainerDump(opcContainer *c, FILE *out);
-    
-    /**
-     Exports the OPC container to "Flat OPC" (http://blogs.msdn.com/b/ericwhite/archive/2008/09/29/the-flat-opc-format.aspx).
-     The flat versions of an OPC file are very important when dealing with e.g XSL(T)-based or Javascript-based transformations.
-     \see opcContainerFlatImport.
-     \todo Implementation needed.
-     */
-    int opcContainerFlatExport(opcContainer *c, const xmlChar *fileName);
-    
-    /**
-     Imports the flat version of an OPC container. 
-     \see opcContainerFlatExport.
-     \todo Implementation needed.
-     */
-    int opcContainerFlatImport(opcContainer *c, const xmlChar *fileName);
-    
-    /**
-     Iterate all types.
-     \code
-     for(xmlChar *type=opcContentTypeFirst(c);
-         NULL!=type;
-         type=opcContentTypeNext(c, type)) {
-        printf("%s\n", type);
-     }
-     \endcode
-    */
-    const xmlChar *opcContentTypeFirst(opcContainer *container);
-    
-    /**
-     \see opcContentTypeNext()
-    */
-    const xmlChar *opcContentTypeNext(opcContainer *container, const xmlChar *type);
-
-    /**
-     Iterate extensions.
-     \code
-     for(const xmlChar *ext=opcExtensionFirst(c);
-         NULL!=ext;
-         ext=opcExtensionNext(ext)) {
-        printf("%s\n", ext);
-     }
-     \endcode
-    */
-    const xmlChar *opcExtensionFirst(opcContainer *container);
-    
-    /**
-     \see opcExtensionFirst()
-     */
-    const xmlChar *opcExtensionNext(opcContainer *container, const xmlChar *ext);
-    
-    /**
-     Get registered type for extension.
-     \see opcExtensionRegister()
-     */
-    const xmlChar *opcExtensionGetType(opcContainer *container, const xmlChar *ext);
-
-    /**
-     Register a mime-type and and extension.
-     \see opcExtensionGetType()
-     */
-    const xmlChar *opcExtensionRegister(opcContainer *container, const xmlChar *ext, const xmlChar *type);
-
-
-    /**
-     Iterator through all relation types of the container:
-     \code
-     for(xmlChar *type=opcRelationTypeFirst(c);
-         NULL!=type;
-         type=opcRelationTypeNext(c, type)) {
-        printf("%s\n", type);
-     }
-     \endcode
-     */
-    const xmlChar *opcRelationTypeFirst(opcContainer *container);
-
-    /**
-     \see opcRelationTypeFirst()
-    */
-    const xmlChar *opcRelationTypeNext(opcContainer *container, const xmlChar *type);
-
-
-    /**
-     Iterator through all relation types of the container:
-     \code
-     for(xmlChar *target=opcExternalTargetFirst(c);
-         NULL!=target;
-         type=opcExternalTargetNext(c, target)) {
-        printf("%s\n", target);
-     }
-     \endcode
-     */
-    const xmlChar *opcExternalTargetFirst(opcContainer *container);
-
-    /**
-     \see opcExternalTargetFirst()
-    */
-    const xmlChar *opcExternalTargetNext(opcContainer *container, const xmlChar *target);
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_CONTAINER_H */
--- a/include/opc/file.h
+++ b/include/opc/file.h
@@ -1,200 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/file.h
- The opc module contains the file library functions.
-*/
-#include <opc/config.h>
-
-#ifndef OPC_FILE_H
-#define OPC_FILE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-/**
-  Flag for READ access.
-  \hideinitializer
-*/
-#define OPC_FILE_READ  (1<<0)
-
-/**
-  Flag for WRITE access.
-  \hideinitializer
-*/
-#define OPC_FILE_WRITE (1<<1)
-
-/**
-  Flag indicates that file will be truncated when opened.
-  \hideinitializer
-*/
-#define OPC_FILE_TRUNC  (1<<2)
-
-
-    /** 
-      Abstraction for see modes.
-      */
-    typedef enum OPC_FILESEEKMODE_ENUM {
-        opcFileSeekSet = SEEK_SET,
-        opcFileSeekCur = SEEK_CUR,
-        opcFileSeekEnd = SEEK_END
-    } opcFileSeekMode;
-
-     /**
-      Callback to read a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static int opcFileRead(void *iocontext, char *buffer, int len) {
-          return fread(buffer, sizeof(char), len, (FILE*)iocontext);
-      }
-      \endcode
-      */
-    typedef int opcFileReadCallback(void *iocontext, char *buffer, int len);
-
-     /**
-      Callback to write a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static int opcFileWrite(void *iocontext, const char *buffer, int len) {
-          return fwrite(buffer, sizeof(char), len, (FILE*)iocontext);
-      }
-      \endcode
-      */
-    typedef int opcFileWriteCallback(void *iocontext, const char *buffer, int len);
-
-     /**
-      Callback to close a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static int opcFileClose(void *iocontext) {
-          return fclose((FILE*)iocontext);
-      }
-      \endcode
-      */
-    typedef int opcFileCloseCallback(void *iocontext);
-
-     /**
-      Callback to seek a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static opc_ofs_t opcFileSeek(void *iocontext, opc_ofs_t ofs) {
-          int ret=fseek((FILE*)iocontext, ofs, SEEK_SET);
-          if (ret>=0) {
-              return ftell((FILE*)iocontext);
-          } else {
-              return ret;
-          }
-      }
-      \endcode
-      */
-    typedef opc_ofs_t opcFileSeekCallback(void *iocontext, opc_ofs_t ofs);
-
-     /**
-      Callback to trim a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static int opcFileTrim(void *iocontext, opc_ofs_t new_size) {
-      #ifdef WIN32
-          return _chsize(fileno((FILE*)iocontext), new_size);
-      #else
-          return ftruncate(fileno((FILE*)iocontext), new_size);
-      #endif
-      }
-      \endcode
-      */
-    typedef int opcFileTrimCallback(void *iocontext, opc_ofs_t new_size);
-
-     /**
-      Callback to flush a file. E.g. for a FILE * context this can be implemented as
-      \code
-      static int opcFileFlush(void *iocontext) {
-          return fflush((FILE*)iocontext);
-      }
-      \endcode
-      */
-    typedef int opcFileFlushCallback(void *iocontext);
-
-    /**
-      Represents a state of a file, i.e. file position (buf_pos) and error status (err).
-      */
-    typedef struct OPC_FILERAWSTATE_STRUCT {
-        opc_error_t err;
-        opc_ofs_t   buf_pos; // current pos in file
-    } opcFileRawState;
-
-    /**
-     File IO context.
-     */
-    typedef struct OPC_IO_STRUCT {
-        opcFileReadCallback *_ioread;
-        opcFileWriteCallback *_iowrite;
-        opcFileCloseCallback *_ioclose;
-        opcFileSeekCallback *_ioseek;
-        opcFileTrimCallback *_iotrim;
-        opcFileFlushCallback *_ioflush;
-        void *iocontext;
-        int flags;
-        opcFileRawState state;
-        opc_ofs_t file_size;
-    } opcIO_t;
-
-    /**
-      Initialize an IO context.
-      */
-    opc_error_t opcFileInitIO(opcIO_t *io,
-                              opcFileReadCallback *ioread,
-                              opcFileWriteCallback *iowrite,
-                              opcFileCloseCallback *ioclose,
-                              opcFileSeekCallback *ioseek,
-                              opcFileTrimCallback *iotrim,
-                              opcFileFlushCallback *ioflush,
-                              void *iocontext,
-                              pofs_t file_size,
-                              int flags);
-
-    /**
-      Initialize an IO context for a file.
-      */
-    opc_error_t opcFileInitIOFile(opcIO_t *io, const xmlChar *filename, int flags);
-
-    /**
-      Initialize an IO for memory.
-      \warning Currently supports READ-ONLY file access.
-      */
-    opc_error_t opcFileInitIOMemory(opcIO_t *io, const opc_uint8_t *data, opc_uint32_t data_len, int flags);
-
-    /**
-      Cleanup an IO context, i.e. release all system resources.
-      */
-    opc_error_t opcFileCleanupIO(opcIO_t *io);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_FILE_H */
--- a/include/opc/helper.h
+++ b/include/opc/helper.h
@@ -1,60 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/helper.h
-  Contains helper functions for the opc module.
-*/
-#include <opc/config.h>
-
-#ifndef OPC_HELPER_H
-#define OPC_HELPER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-
-    /**
-      Constructs a segment name.
-      */
-    opc_uint16_t opcHelperAssembleSegmentName(char *out, opc_uint16_t out_size, const xmlChar *name, opc_uint32_t segment_number, opc_uint32_t next_segment_id, opc_bool_t rels_segment, opc_uint16_t *out_max);
-
-    /**
-      Splits a filename into the segment informations.
-      */
-    opc_error_t opcHelperSplitFilename(opc_uint8_t *filename, opc_uint32_t filename_length, opc_uint32_t *segment_number, opc_bool_t *last_segment, opc_bool_t *rel_segment);
-
-#endif /* OPC_HELPER_H */
--- a/include/opc/inputstream.h
+++ b/include/opc/inputstream.h
@@ -1,74 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/inputstream.h
- 
- */
-#include <opc/config.h>
-
-#ifndef OPC_INPUTSTREAM_H
-#define OPC_INPUTSTREAM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-    /**
-      Internal type which represents a binary input stream.
-      */
-    typedef struct OPC_CONTAINER_INPUTSTREAM_STRUCT opcContainerInputStream;
-
-    /**
-      Opens the part \c name of the \c container for reading.
-      */
-    opcContainerInputStream* opcContainerOpenInputStream(opcContainer *container, const xmlChar *name);
-
-    /**
-     Reads maximal \c buffer_len bytes from the input \c stream to \c buffer. 
-     \return The number of byes read or "0" in case of an error or end-of-stream.
-     */
-    opc_uint32_t opcContainerReadInputStream(opcContainerInputStream* stream, opc_uint8_t *buffer, opc_uint32_t buffer_len);
-
-    /**
-      Closes the input stream and releases all system resources.
-      */
-    opc_error_t opcContainerCloseInputStream(opcContainerInputStream* stream);
-
-    /**
-      Returns the type of compression used for the stream.
-      */
-    opcCompressionOption_t opcContainerGetInputStreamCompressionOption(opcContainerInputStream* stream);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_INPUTSTREAM_H */
--- a/include/opc/opc.h
+++ b/include/opc/opc.h
@@ -1,73 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/opc.h
- The opc module contains the basic library functions.
-*/
-#include <opc/config.h>
-#include <opc/container.h>
-#include <opc/part.h>
-#include <opc/relation.h>
-#include <opc/inputstream.h>
-#include <opc/outputstream.h>
-#include <opc/zip.h>
-#include <opc/xmlreader.h>
-#include <opc/xmlwriter.h>
-#include <opc/properties.h>
-
-#ifndef OPC_OPC_H
-#define OPC_OPC_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-    
-    /**
-     * Initialize libopc.
-     * Sample:
-     * \include opc_helloworld.c
-     * @return Non-zero if successful.
-     */
-    opc_error_t opcInitLibrary();
-
-    /**
-     * Free libopc. Clean up all resources.
-     * @return Non-zero if successful.
-     * \see opcInitLibrary.
-     */
-    opc_error_t opcFreeLibrary();
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_OPC_H */
--- a/include/opc/outputstream.h
+++ b/include/opc/outputstream.h
@@ -1,71 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/outputstream.h
- 
- */
-#include <opc/config.h>
-
-#ifndef OPC_OUTPUTSTREAM_H
-#define OPC_OUTPUTSTREAM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-    /**
-      Internal type which represents a binary output stream.
-      */
-    typedef struct OPC_CONTAINER_OUTPUTSTREAM_STRUCT opcContainerOutputStream;
-
-    /** 
-      Open the part \c name or writing in \c container with compression \c compression_option.
-      \note Make sure the part exists! 
-      \see opcPartCreate.
-      */
-    opcContainerOutputStream* opcContainerCreateOutputStream(opcContainer *container, const xmlChar *name, opcCompressionOption_t compression_option);
-
-    /**
-      Write \c buffer_len bytes from \c buffer to \c stream. 
-      \return Returns the number of bytes written.
-      */
-    opc_uint32_t opcContainerWriteOutputStream(opcContainerOutputStream* stream, const opc_uint8_t *buffer, opc_uint32_t buffer_len);
-
-    /**
-      Close the \c stream and free all associated resources.
-      */
-    opc_error_t opcContainerCloseOutputStream(opcContainerOutputStream* stream);
-        
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_OUTPUTSTREAM_H */
--- a/include/opc/part.h
+++ b/include/opc/part.h
@@ -1,118 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/part.h
- 
- */
-#include <opc/config.h>
-
-#ifndef OPC_PART_H
-#define OPC_PART_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-    /**
-     Handle to an OPC part created by \ref opcPartOpen.
-     \see opcPartOpen.
-     */
-    typedef xmlChar* opcPart;
-
-/**
-  Represents an invalid (resp. NULL) part.
-  In releations OPC_PART_INVALID also represents the root part.
-  \hideinitializer
-  */
-#define OPC_PART_INVALID NULL
-
-    /**
-     Find a part in a \ container by \c absolutePath and/or \c type.
-     Currently no flags are supported.
-     */
-    opcPart opcPartFind(opcContainer *container, 
-                        const xmlChar *absolutePath, 
-                        const xmlChar *type,
-                        int flags);
-
-    /**
-     Creates a part in a \ container with \c absolutePath and \c type.
-     Currently no flags are supported.
-     */
-    opcPart opcPartCreate(opcContainer *container, 
-                          const xmlChar *absolutePath, 
-                          const xmlChar *type,
-                          int flags);
-
-    /**
-      Returns the type of the container.
-      The string is interned and must not be freed.
-      */
-    const xmlChar *opcPartGetType(opcContainer *c, opcPart part);
-
-    /**
-      Returns the type of the container. 
-      If \c override_only then the return value will be NULL for parts not having an override type.
-      The string is interned and must not be freed.
-      */
-    const xmlChar *opcPartGetTypeEx(opcContainer *c, opcPart part, opc_bool_t override_only);
-
-    /**
-     Deleted that part \c absolutePath in the \c container.
-     */
-    opc_error_t opcPartDelete(opcContainer *container, const xmlChar *absolutePath);
-
-    /**
-      Get the first part.
-      \code
-      for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
-        printf("%s; \n", part, opcPartGetType(c, part));
-      }
-      \endcode 
-      */
-    opcPart opcPartGetFirst(opcContainer *container);
-
-    /**
-     Get the next part.
-     \see opcPartGetFirst
-      */
-    opcPart opcPartGetNext(opcContainer *container, opcPart part);
-
-    /**
-      Returns the size in bytes of the \c part.
-      */
-    opc_ofs_t opcPartGetSize(opcContainer *c, opcPart part);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_PART_H */
--- a/include/opc/properties.h
+++ b/include/opc/properties.h
@@ -1,121 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/properties.h
- 
- */
-#include <opc/config.h>
-#include <opc/container.h>
-
-#ifndef OPC_PROPERTIES_H
-#define OPC_PROPERTIES_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-    /** 
-      Represents a simple Dublin Core type.
-      */
-    typedef struct OPC_DC_SIMPLE_TYPE {
-        xmlChar *str;
-        xmlChar *lang;
-    } opcDCSimpleType_t;
-
-    /** 
-      Represents the core properties of an OPC container.
-      */
-    typedef struct OPC_PROPERTIES_STRUCT {
-        xmlChar *category;                /* xsd:string     */
-        xmlChar *contentStatus;           /* xsd:string     */
-        xmlChar *created;                 /* dc:date        */
-        opcDCSimpleType_t creator;        /* dc:any         */
-        opcDCSimpleType_t description;    /* dc:any         */
-        opcDCSimpleType_t identifier;     /* dc:any         */
-        opcDCSimpleType_t *keyword_array; /* cp:CT_Keywords */
-        opc_uint32_t keyword_items;
-        opcDCSimpleType_t language;       /* dc:any         */
-        xmlChar *lastModifiedBy;          /* xsd:string     */
-        xmlChar *lastPrinted;             /* xsd:dateTime   */
-        xmlChar *modified;                /* dc:date        */
-        xmlChar *revision;                /* xsd:string     */
-        opcDCSimpleType_t subject;        /* dc:any         */
-        opcDCSimpleType_t title;          /* dc:any         */
-        xmlChar *version;                 /* xsd:string     */
-    } opcProperties_t;
-
-    /**
-      Initialize the core properties \c cp.
-      \see opcCorePropertiesSetString
-      */
-    opc_error_t opcCorePropertiesInit(opcProperties_t *cp);
-
-    /**
-      Cleanup the core properties \c cp, i.e. release all resources.
-      \see opcCorePropertiesSetString
-      */
-    opc_error_t opcCorePropertiesCleanup(opcProperties_t *cp);
-
-    /**
-      Rease the core properties \c cp from the container \c.
-      */
-    opc_error_t opcCorePropertiesRead(opcProperties_t *cp, opcContainer *c);
-
-
-    /**
-      Write/Update the core properties \c cp in the container \c.
-      */
-    opc_error_t opcCorePropertiesWrite(opcProperties_t *cp, opcContainer *c);
-
-    /**
-      Update a string in the core properties the right way.
-      \code
-      opcProperties_t cp;
-      opcCorePropertiesInit(&cp);
-      opcCorePropertiesSetString(&cp.revision, "1");
-      opcCorePropertiesSetStringLang(&cp.creator, "Florian Reuter", NULL);
-      opcCorePropertiesCleanup(&cp);
-      \endcode
-      */
-    opc_error_t opcCorePropertiesSetString(xmlChar **prop, const xmlChar *str);
-
-    /** 
-      Update a core properties the right way.
-      \see opcCorePropertiesSetString
-      */
-    opc_error_t opcCorePropertiesSetStringLang(opcDCSimpleType_t *prop, const xmlChar *str, const xmlChar *lang);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_PROPERTIES_H */
--- a/include/opc/relation.h
+++ b/include/opc/relation.h
@@ -1,140 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/relation.h
- 
- */
-#include <opc/config.h>
-
-#ifndef OPC_RELATION_H
-#define OPC_RELATION_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-    /**
-     Indentifier for an OPC relation.
-     */
-    typedef opc_uint32_t opcRelation;
-
-/**
-  Constant which represents an invalid relation.
-*/
-#define OPC_RELATION_INVALID (-1)
-
-    /**
-      Find a relation originating from \c part in \c container with \c relationId and/or \c mimeType.
-      If \c part is OPC_PART_INVALID then part represents the root part.
-      @param[in] relationId The relationId (e.g. "rId1") or NULL.
-      @param[in] mimeType The mimeType or NULL.
-      */
-    opcRelation opcRelationFind(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
-
-    /**
-      Deleted the relation from the container.
-      \see opcRelationFind.
-      */
-    opc_error_t opcRelationDelete(opcContainer *container, opcPart part, const xmlChar *relationId, const xmlChar *mimeType);
-
-    /**
-      Returns the first relation.
-      The following code will dump all relations:
-      \code
-        for(opcPart part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=opcPartGetNext(c, part)) {
-           for(opcRelation rel=opcRelationFirst(part, c);
-               OPC_PART_INVALID!=rel;
-               rel=opcRelationNext(c, rel)) {
-               opcPart internal_target=opcRelationGetInternalTarget(c, part, rel);
-               const xmlChar *external_target=opcRelationGetExternalTarget(c, part, rel);
-               const xmlChar *target=(NULL!=internal_target?internal_target:external_target);
-               const xmlChar *prefix=NULL;
-               opc_uint32_t counter=-1;
-               const xmlChar *type=NULL;
-               opcRelationGetInformation(c, part, rel, &prefix, &counter, &type);        
-               if (-1==counter) { // no counter after prefix
-                  printf("%s;%s;%s;%s\n", part, prefix, target, type);
-               } else {
-                  printf("%s;%s%i;%s;%s\n", part, prefix, counter, target, type);
-               }
-           }
-        }
-      \endcode
-      */
-    opcRelation opcRelationFirst(opcContainer *container, opcPart part);
-
-    /**
-      \see opcRelationFirst
-      */
-    opcRelation opcRelationNext(opcContainer *container, opcPart part, opcRelation relation);
-    
-    /**
-      Returns the internal target.
-      \note To test for an external target use opcRelationGetExternalTarget.
-      \see opcRelationGetExternalTarget
-      */
-    opcPart opcRelationGetInternalTarget(opcContainer *container, opcPart part, opcRelation relation);
-
-    /**
-      Returns the external target or NULL if it is an internal target.
-      The string is interned. Must not be freed.
-      \see opcRelationGetExternalTarget
-      */
-    const xmlChar *opcRelationGetExternalTarget(opcContainer *container, opcPart part, opcRelation relation);
-
-    /**
-      Returns the relations type.
-      The string is interned. Must not be freed.
-      */
-    const xmlChar *opcRelationGetType(opcContainer *container, opcPart part, opcRelation relation);
-
-    /** 
-      Get information about a relation.
-      \see opcRelationFirst
-      */
-    void opcRelationGetInformation(opcContainer *container, opcPart part, opcRelation relation, const xmlChar **prefix, opc_uint32_t *counter, const xmlChar **type);
-
-    /** 
-      Add a relation to \c container from \c src part to \c dest part with id \c rid and type \c type.
-      */
-    opc_uint32_t opcRelationAdd(opcContainer *container, opcPart src, const xmlChar *rid, opcPart dest, const xmlChar *type);
-
-    /** 
-      Add an external relation to \c container from \c src part to \c target URL with id \c rid and type \c type.
-      */
-    opc_uint32_t opcRelationAddExternal(opcContainer *container, opcPart src, const xmlChar *rid, const xmlChar *target, const xmlChar *type);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-
-#endif /* OPC_RELATION_H */
--- a/include/opc/xmlreader.h
+++ b/include/opc/xmlreader.h
@@ -1,69 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/xmlreader.h
- 
- */
-
-#ifndef OPC_XMLREADER_H
-#define OPC_XMLREADER_H
-
-#include <opc/config.h>
-#include <libxml/xmlreader.h>
-#include <mce/textreader.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-    /** 
-      Open an MCE reader for \c partName. Parameters \c URL, \c encoding and \c options will be passed unmodified to 
-      http://xmlsoft.org/html/libxml-xmlreader.html#xmlReaderForIO and they can we NULL, NULL, 0.
-      \note Make sure the part exists.
-      \see opcPartFind
-      */
-    opc_error_t opcXmlReaderOpen(opcContainer *container, mceTextReader_t *mceTextReader, const xmlChar *partName, const char * URL, const char * encoding, int options);
-
-    /**
-      Returns an libxml DOM document. Parameters \c URL, \c encoding and \c options will be passed unmodified to 
-      http://xmlsoft.org/html/libxml-parser.html#xmlReadIO and they can we NULL, NULL, 0.
-      \note Make sure the part exists.
-      \see opcPartFind
-      */
-    xmlDocPtr opcXmlReaderReadDoc(opcContainer *container, const xmlChar *partName, const char * URL, const char * encoding, int options);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_XMLREADER_H */
--- a/include/opc/xmlwriter.h
+++ b/include/opc/xmlwriter.h
@@ -1,57 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/xmlwriter.h
- 
- */
-#include <opc/config.h>
-#include <mce/textwriter.h>
-
-#ifndef OPC_XMLWRITER_H
-#define OPC_XMLWRITER_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-    /**
-      Create an MCE text writer for \c part in \c container with compression \c compression_option.
-      \note Make sure the part exists.
-      \see opcPartFind
-      */
-    mceTextWriter *mceTextWriterOpen(opcContainer *c, opcPart part, opcCompressionOption_t compression_option);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_XMLWRITER_H */
--- a/include/opc/zip.h
+++ b/include/opc/zip.h
@@ -1,255 +0,0 @@
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-/** @file opc/zip.h
-  The ZIP file backend of an OPC container.
- */
-#include <opc/config.h>
-#include <opc/file.h>
-#include <opc/container.h>
-
-#ifndef OPC_ZIP_H
-#define OPC_ZIP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif    
-
-    /**
-     Default growth hint of an OPC stream.
-     */
-    #define OPC_DEFAULT_GROWTH_HINT 512
-
-    /**
-     Handle to a ZIP archive.
-     \see internal.h
-     */
-    typedef struct OPC_ZIP_STRUCT opcZip;
-
-    /**
-      Handle to a raw ZIP input stream.
-     \see internal.h
-      */
-    typedef struct OPC_ZIPINPUTSTREAM_STRUCT opcZipInputStream;
-
-    /**
-      Handle to a raw ZIP output stream.
-     \see internal.h
-      */
-    typedef struct OPC_ZIPOUTPUTSTREAM_STRUCT opcZipOutputStream;
-
-    /**
-     Holds all information of a ZIP segment.
-     */
-    typedef struct OPC_ZIP_SEGMENT_INFO_STRUCT {
-        xmlChar name[OPC_MAX_PATH]; 
-        opc_uint32_t name_len;
-        opc_uint32_t segment_number;
-        opc_bool_t   last_segment;
-        opc_bool_t   rels_segment;
-        opc_uint32_t header_size;
-        opc_uint32_t min_header_size;
-        opc_uint32_t trailing_bytes;
-        opc_uint32_t compressed_size;
-        opc_uint32_t uncompressed_size;
-        opc_uint16_t bit_flag;
-        opc_uint32_t data_crc;
-        opc_uint16_t compression_method;
-        opc_ofs_t    stream_ofs;
-        opc_uint16_t growth_hint;
-    } opcZipSegmentInfo_t;
-
-    /**
-      \see opcZipLoader
-      */
-    typedef int opcZipLoaderOpenCallback(void *iocontext);
-    /**
-      \see opcZipLoader
-      */
-    typedef int opcZipLoaderSkipCallback(void *iocontext);
-    /**
-      \see opcZipLoader
-      */
-    typedef int opcZipLoaderReadCallback(void *iocontext, char *buffer, int len);
-    /**
-      \see opcZipLoader
-      */
-    typedef int opcZipLoaderCloseCallback(void *iocontext);
-
-    /**
-      \see opcZipLoader
-      */
-    typedef opc_error_t (opcZipLoaderSegmentCallback_t)(void *iocontext, void *userctx, opcZipSegmentInfo_t *info, opcZipLoaderOpenCallback *open, opcZipLoaderReadCallback *read, opcZipLoaderCloseCallback *close, opcZipLoaderSkipCallback *skip);
-
-    /**
-      Walks every segment in a ZIP archive and calls the \c segmentCallback callback method.
-      The implementer \c segmentCallback method must then eiher use the passed \c open, \c read and \c close methods
-      to read the stream or the passed \c skip methods to skip the stream.
-      This method can be used to e.g. read ZIP file in stream mode.
-      */
-    opc_error_t opcZipLoader(opcIO_t *io, void *userctx, opcZipLoaderSegmentCallback_t *segmentCallback);
-
-    /**
-      \see opcZipClose
-     */
-    typedef opc_error_t (opcZipSegmentReleaseCallback)(opcZip *zip, opc_uint32_t segment_id);
-
-    /** 
-     Closes the ZIP archive \c zip and will call \c releaseCallback for every segment to give the implementer a chance
-     to free user resources.
-     */
-    void opcZipClose(opcZip *zip, opcZipSegmentReleaseCallback* releaseCallback);
-
-    /**
-      Creates an empty ZIP archive with the given \c io.
-      */
-    opcZip *opcZipCreate(opcIO_t *io);
-
-    /**
-      Commits all buffers and writes the ZIP archives local header directories.
-      if \c trim is true then padding bytes will be removed, i.e. the ZIP file size fill be minimalized.
-     */
-    opc_error_t opcZipCommit(opcZip *zip, opc_bool_t trim);
-
-    /**
-      Garbage collection on the passed \c zip archive. This will e.g. make deleted files available as free space.
-      */
-    opc_error_t opcZipGC(opcZip *zip);
-
-    /**
-      Load segment information into \c info.
-      If \c rels_segment is -1 then load the info for part with name \c partName.
-      Otherwise load the segment information for the ".rels." segment of \c partName.
-      \return Returns the segment_id.
-      */
-    opc_uint32_t opcZipLoadSegment(opcZip *zip, const xmlChar *partName, opc_bool_t rels_segment, opcZipSegmentInfo_t *info);
-
-    /**
-      Create a segment with the given parameters.
-      \return Returns the segment_id.
-      */
-    opc_uint32_t opcZipCreateSegment(opcZip *zip, 
-                                     const xmlChar *partName, 
-                                     opc_bool_t relsSegment, 
-                                     opc_uint32_t segment_size, 
-                                     opc_uint32_t growth_hint,
-                                     opc_uint16_t compression_method,
-                                     opc_uint16_t bit_flag);
-
-    /**
-      Creates an input stream for the segment with \c segment_id.
-      \see opcZipLoadSegment
-      \see opcZipCreateSegment
-      */
-    opcZipInputStream *opcZipOpenInputStream(opcZip *zip, opc_uint32_t segment_id);
-
-    /**
-     Free all resources of the input stream.
-     */
-    opc_error_t opcZipCloseInputStream(opcZip *zip, opcZipInputStream *stream);
-
-    /**
-     Read maximal \c buf_len bytes from the input stream into \buf. 
-     \return Returns the number of bytes read.
-     */
-    opc_uint32_t opcZipReadInputStream(opcZip *zip, opcZipInputStream *stream, opc_uint8_t *buf, opc_uint32_t buf_len);
-
-
-    /**
-      Creates an output stream for the segment with \c segment_id.
-      If \c *segment_id is -1 then a new segment will be created. 
-      Otherwise the segment with \c *segment_id will be overwritten.
-     */
-    opcZipOutputStream *opcZipCreateOutputStream(opcZip *zip, 
-                                             opc_uint32_t *segment_id, 
-                                             const xmlChar *partName, 
-                                             opc_bool_t relsSegment, 
-                                             opc_uint32_t segment_size, 
-                                             opc_uint32_t growth_hint,
-                                             opc_uint16_t compression_method,
-                                             opc_uint16_t bit_flag);
-
-    /**
-      Opens an existing ouput stream for reading.
-      The \c *segment_id will be set to -1 and reset on opcZipCloseOutputStream.
-      \see opcZipCloseOutputStream
-     */
-    opcZipOutputStream *opcZipOpenOutputStream(opcZip *zip, opc_uint32_t *segment_id);
-
-    /** 
-      Will close the stream and free all resources. Additionally the new segment id will be stored in \c *segment_id.
-      \see opcZipOpenOutputStream
-      */
-    opc_error_t opcZipCloseOutputStream(opcZip *zip, opcZipOutputStream *stream, opc_uint32_t *segment_id);
-
-    /**
-     Write \c buf_len bytes to \c buf. 
-     \return Returns the number of bytes written.
-     */
-    opc_uint32_t opcZipWriteOutputStream(opcZip *zip, opcZipOutputStream *stream, const opc_uint8_t *buf, opc_uint32_t buf_len);
-
-    /**
-     Returns the first segment id or -1.
-     Use the following code to iterarte through all segments.
-     \code 
-     for(opc_uint32_t segment_id=opcZipGetFirstSegmentId(zip);
-         -1!=segment_id;
-         segment_id=opcZipGetNextSegmentId(zip, segment_id) {
-        ...
-     }
-     \endcode
-     \see opcZipGetNextSegmentId
-     */
-    opc_uint32_t opcZipGetFirstSegmentId(opcZip *zip);
-
-    /**
-     Returns the next segment id or -1.
-     \see opcZipGetFirstSegmentId
-     */
-    opc_uint32_t opcZipGetNextSegmentId(opcZip *zip, opc_uint32_t segment_id);
-
-    /**
-     Returns info about the given segment id.
-     */
-    opc_error_t opcZipGetSegmentInfo(opcZip *zip, opc_uint32_t segment_id, const xmlChar **name, opc_bool_t *rels_segment, opc_uint32_t *crc);
-
-    /**
-     Marks a given segments as deleted.
-     \see opcZipGC
-     */
-    opc_bool_t opcZipSegmentDelete(opcZip *zip, opc_uint32_t *first_segment, opc_uint32_t *last_segment, opcZipSegmentReleaseCallback* releaseCallback);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif    
-        
-#endif /* OPC_ZIP_H */
--- a/include/plib/plib.h
+++ b/include/plib/plib.h
@@ -1,168 +0,0 @@
-/* include/plib/plib.h.  Generated from plib.h by configure.  */
-/*
- Copyright (c) 2010, Florian Reuter
- All rights reserved.
- 
- Redistribution and use in source and binary forms, with or without 
- modification, are permitted provided that the following conditions 
- are met:
- 
- * Redistributions of source code must retain the above copyright 
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright 
-   notice, this list of conditions and the following disclaimer in 
-   the documentation and/or other materials provided with the 
-   distribution.
- * Neither the name of Florian Reuter nor the names of its contributors 
-   may be used to endorse or promote products derived from this 
-   software without specific prior written permission.
- 
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
- COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
- OF THE POSSIBILITY OF SUCH DAMAGE.
- 
-*/
-#ifndef _PLIB_PLIB_H_
-#define _PLIB_PLIB_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define HAVE_STDINT_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDIO_H 1
-#define HAVE_STRING_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_STDLIB_H 1
-/* #undef HAVE_IO_H */
-#define HAVE_UNISTD_H 1
-#define HAVE_SYS_TYPES_H 1
-#define IS_CONFIGURED 1
-
-#if !defined(IS_CONFIGURED)
-#if defined(WIN32)
-#define HAVE_STRING_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_LIMITS_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDIO_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_IO_H
-#define snprintf _snprintf
-#else
-#error "configure not executed and we are not on a win32 machine? please run configure or define WIN32 is you are on a WIN32 platform."
-#endif
-#endif
-
-#ifdef HAVE_STDDEF_H
-#include <stddef.h>
-typedef size_t pofs_t; // maximum file offset for eg. read write ops
-#else
-#error "system types can not be determined"
-#endif
-
-#ifdef HAVE_STDIO_H
-#include <stdio.h>
-#else
-#error "system io can not be determined"
-#endif
-
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-
-typedef int8_t pint8_t;
-typedef uint8_t puint8_t;
-
-typedef int16_t pint16_t;
-typedef uint16_t puint16_t;
-
-typedef int32_t pint32_t;
-typedef uint32_t puint32_t;
-
-typedef int64_t pint64_t;
-typedef uint64_t puint64_t;
-
-typedef int pbool_t;
-
-typedef size_t psize_t;
-
-// INTN_MAX, INTN_MIN, UINTN_MAX
-#else
-#error "system types can not be determined"
-#endif
-
-#ifdef HAVE_STRING_H
-#include <string.h>
-#endif
-
-#ifdef HAVE_LIMITS_H
-#include <limits.h>
-#define PUINT8_MAX UCHAR_MAX 
-#define PINT32_MAX INT_MAX 
-#define PINT32_MIN INT_MIN 
-#define PUINT32_MAX UINT_MAX 
-#define PUINT32_MIN 0 
-#define PUINT16_MAX	USHRT_MAX 
-#define PUINT16_MIN 0 
-#else
-#error "limits can not be determined"
-#endif
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#ifdef HAVE_IO_H
-#include <io.h>
-#endif
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-/**
- Converts an ASCII string to a xmlChar string. This only works for ASCII strings.
- */
-#ifndef _X
-#define _X(s) BAD_CAST(s) 
-#endif
-
-
-/**
- Converts an xmlChar string to an ASCII string. This only works for ASCII charsets.
- */
-#ifndef _X2C
-#define _X2C(s) ((char*)(s))
-#endif
-
-
-#define PASSERT(e) assert(e)
-#ifdef NDEBUG
-#define PENSURE(e) (void)(e)
-#else
-#define PENSURE(e) assert(e)
-#endif
-#define PTRUE (0==0)
-#define PFALSE (0==1)
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-
-#endif /* _PLIB_PLIB_H_ */
--- a/lib/bzip2-1.0.6
+++ b/lib/bzip2-1.0.6
--- a/lib/ffmpeg
+++ b/lib/ffmpeg
--- a/lib/harfbuzz
+++ b/lib/harfbuzz
--- a/lib/leptonica
+++ b/lib/leptonica
--- a/lib/libmagic
+++ b/lib/libmagic
--- a/lib/libopc/libmce.a
+++ b/lib/libopc/libmce.a
--- a/lib/libopc/libopc.a
+++ b/lib/libopc/libopc.a
--- a/lib/libopc/libplib.a
+++ b/lib/libopc/libplib.a
--- a/lib/libtiff
+++ b/lib/libtiff
--- a/lib/mupdf
+++ b/lib/mupdf
--- a/lib/onion
+++ b/lib/onion
--- a/lib/openjpeg
+++ b/lib/openjpeg
--- a/lib/tesseract
+++ b/lib/tesseract
--- a/1
+++ b/1
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -1,17 +1,43 @@
 {
  "properties": {
+    "_tie": {
+      "type": "keyword",
+      "doc_values": true
+    },
+    "_depth": {
+      "type": "integer"
+    },
    "path": {
      "type": "text",
      "analyzer": "path_analyzer",
-      "copy_to": "suggest-path"
+      "copy_to": "suggest-path",
+      "fielddata": true,
+      "fields": {
+        "nGram": {
+          "type": "text",
+          "analyzer": "my_nGram"
+        },
+        "text": {
+          "type": "text",
+          "analyzer": "content_analyzer"
+        }
+      }
    },
    "suggest-path": {
      "type": "completion",
-      "analyzer": "keyword"
+      "analyzer": "case_insensitive_kw_analyzer"
    },
    "mime": {
      "type": "keyword"
    },
+    "parent": {
+      "type": "keyword",
+      "index": false
+    },
+    "thumbnail": {
+      "type": "keyword",
+      "index": false
+    },
    "videoc": {
      "type": "keyword",
      "index": false
@@ -32,6 +58,10 @@
      "type": "integer",
      "index": false
    },
+    "pages": {
+      "type": "integer",
+      "index": false
+    },
    "mtime": {
      "type": "integer"
    },
@@ -104,7 +134,42 @@
      }
    },
    "tag": {
+      "type": "keyword",
+      "copy_to": "suggest-tag"
+    },
+    "suggest-tag": {
+      "type": "completion",
+      "analyzer": "case_insensitive_kw_analyzer"
+    },
+    "exif_make": {
+      "type": "text"
+    },
+    "exif_model": {
+      "type": "text"
+    },
+    "exif:software": {
+      "type": "text"
+    },
+    "exif_exposure_time": {
      "type": "keyword"
+    },
+    "exif_fnumber": {
+      "type": "keyword"
+    },
+    "exif_iso_speed_ratings": {
+      "type": "keyword"
+    },
+    "exif_focal_length": {
+      "type": "keyword"
+    },
+    "exif_user_comment": {
+      "type": "text"
+    },
+    "author": {
+      "type": "text"
+    },
+    "modified_by": {
+      "type": "text"
    }
  }
 }
--- a/schema/pipeline.json
+++ b/schema/pipeline.json
@@ -0,0 +1,10 @@
+{
+  "description": "Copy _id to _tie, save path depth",
+  "processors": [
+    {
+      "script": {
+        "source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
+      }
+    }
+  ]
+}
--- a/schema/settings.json
+++ b/schema/settings.json
@@ -1,7 +1,8 @@
 {
  "index": {
    "refresh_interval": "30s",
-    "codec": "best_compression"
+    "codec": "best_compression",
+    "number_of_replicas": 0
  },
  "analysis": {
    "tokenizer": {
@@ -21,6 +22,12 @@
          "lowercase"
        ]
      },
+      "case_insensitive_kw_analyzer": {
+        "tokenizer": "keyword",
+        "filter": [
+          "lowercase"
+        ]
+      },
      "my_nGram": {
        "tokenizer": "my_nGram_tokenizer",
        "filter": [
--- a/scripts/before_build.sh
+++ b/scripts/before_build.sh
@@ -2,15 +2,15 @@

 rm -rf index.sist2/

-rm web/js/bundle.js 2> /dev/null
-cat `ls web/js/*.min.js` > web/js/bundle.js
-cat web/js/{util,dom,search}.js >> web/js/bundle.js
+rm src/static/js/bundle.js 2> /dev/null
+cat `ls src/static/js/*.min.js` > src/static/js/bundle.js
+cat src/static/js/{util,dom}.js >> src/static/js/bundle.js

-rm web/css/bundle*.css 2> /dev/null
-cat web/css/*.min.css > web/css/bundle.css
-cat web/css/light.css >> web/css/bundle.css
-cat web/css/*.min.css > web/css/bundle_dark.css
-cat web/css/dark.css >> web/css/bundle_dark.css
+rm src/static/css/bundle*.css 2> /dev/null
+cat src/static/css/*.min.css > src/static/css/bundle.css
+cat src/static/css/light.css >> src/static/css/bundle.css
+cat src/static/css/*.min.css > src/static/css/bundle_dark.css
+cat src/static/css/dark.css >> src/static/css/bundle_dark.css

 python3 scripts/mime.py > src/parsing/mime_generated.c
 python3 scripts/serve_static.py > src/web/static_generated.c
--- a/scripts/get_static_libs.sh
+++ b/scripts/get_static_libs.sh
@@ -1,100 +0,0 @@
-#!/usr/bin/env bash
-
-THREADS=$(nproc)
-
-cd lib
-
-cd mupdf
-CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
-cd ..
-
-mv mupdf/build/release/libmupdf.a .
-mv mupdf/build/release/libmupdf-third.a .
-
-# openjp2
-cd openjpeg
-#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
-cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
-make -j $THREADS
-cd ..
-mv openjpeg/bin/libopenjp2.a .
-
-# harfbuzz
-cd harfbuzz
-./autogen.sh
-./configure --disable-shared --enable-static
-make -j $THREADS
-cd ..
-mv harfbuzz/src/.libs/libharfbuzz.a .
-
-# ffmpeg
-cd ffmpeg
-./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
- --disable-ffprobe --disable-doc\
- --disable-manpages --disable-postproc --disable-avfilter \
- --disable-alsa --disable-lzma --disable-xlib --disable-debug\
- --disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
- --extra-cflags=-fPIC
-make -j $THREADS
-cd ..
-
-mv ffmpeg/libavcodec/libavcodec.a .
-mv ffmpeg/libavformat/libavformat.a .
-mv ffmpeg/libavutil/libavutil.a .
-mv ffmpeg/libswresample/libswresample.a .
-mv ffmpeg/libswscale/libswscale.a .
-
-# onion
-cd onion
-mkdir build 2> /dev/null
-cd build
-cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_USE_JPEG=false \
-DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
-DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
-DONION_USE_BINDINGS_CPP=false ..
-make -j $THREADS
-cd ../..
-
-mv onion/build/src/onion/libonion_static.a .
-
-#bzip2
-cd bzip2-1.0.6
-make -j $THREADS
-cd ..
-mv bzip2-1.0.6/libbz2.a .
-
-# magic
-cd libmagic
-./autogen.sh
-./configure --enable-static --disable-shared
-make -j $THREADS
-cd ..
-mv libmagic/src/.libs/libmagic.a .
-
-# tesseract
-cd tesseract
-mkdir build
-cd build
-cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off ..
-make -j $THREADS
-cd ../..
-mv tesseract/build/libtesseract.a .
-
-# leptonica
-cd leptonica
-./autogen.sh
-./configure --without-zlib --without-jpeg --without-giflib \
-  --without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
-  --enable-static --disable-shared
-make -j $THREADS
-cd ..
-mv leptonica/src/.libs/liblept.a .
-
-# tiff
-cd libtiff
-./autogen.sh
-./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
-  --disable-lzma --disable-zstd --disable-jbig
-make -j $THREADS
-cd ..
-mv libtiff/libtiff/.libs/libtiff.a .
--- a/scripts/index_static.py
+++ b/scripts/index_static.py
@@ -1,6 +1,9 @@
+import json
+
 files = [
    "schema/mappings.json",
    "schema/settings.json",
+    "schema/pipeline.json",
 ]


@@ -9,6 +12,7 @@ def clean(filepath):


 for file in files:
-    with open(file, "rb") as f:
-        data = f.read()
+    with open(file, "r") as f:
+        data = json.dumps(json.load(f), separators=(",", ":")).encode()
+    data += b'\0'
    print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
--- a/scripts/mime.csv
+++ b/scripts/mime.csv
@@ -2,14 +2,18 @@ application/arj, arj
 application/base64, mme
 application/binhex, hqx
 application/book, boo|book
+application/CDFV2-corrupt,
 application/CDFV2, sdv
 application/clariscad, ccad
 application/commonground, dp
+application/csv,
+application/dicom, dcm
 application/drafting, drw
+application/epub+zip, epub
 application/freeloader, frl
 application/futuresplash, spl
 application/groupwise, vew
-application/gzip, gz
+application/gzip, gz|tgz
 application/hta, hta
 application/i-deas, unv
 application/iges, iges|igs
@@ -17,7 +21,6 @@ application/inf, inf
 application/java-archive, jar
 application/java, class
 application/javascript,
-application/x-archive, a
 application/json, json
 application/marc, mrc
 application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
 application/netmc, mcp
 application/octet-stream, bin|dump|gpg
 application/oda, oda
+application/ogg, ogv
 application/pdf, pdf
+application/pgp-keys,
 application/pgp-signature, pgp
 application/pkcs7-signature, p7s
 application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
 application/vnd.fdf, fdf
 application/vnd.font-fontforge-sfd, sfd
 application/vnd.hp-hpgl, hgl|hpg|hpgl
+application/vnd.iccprofile, icm
+application/vnd.iccprofile, icm
+application/vnd.lotus-1-2-3,
+application/vnd.ms-cab-compressed, cab
 application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
 application/vnd.ms-fontobject, eot
 application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
 application/vnd.oasis.opendocument.base, odb
 application/vnd.oasis.opendocument.formula, odf
 application/vnd.oasis.opendocument.graphics, odg
+application/vnd.oasis.opendocument.presentation, odp
+application/vnd.oasis.opendocument.spreadsheet, ods
 application/vnd.oasis.opendocument.text, odt
+application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
+application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
+application/vnd.symbian.install,
+application/vnd.tcpdump.pcap, pcap
 application/vnd.wap.wmlc, wmlc
 application/vnd.wap.wmlscriptc, wmlsc
 application/vnd.xara, web
 application/vocaltec-media-desc, vmd
 application/vocaltec-media-file, vmf
+application/warc, warc
+application/winhelp, hlp
 application/wordperfect6.0, w60
 application/wordperfect6.1, w61
 application/wordperfect, wp|wp5|wp6|wpd
 application/x-123, wk1
+application/x-7z-compressed, 7z
 application/x-aim, aim
+application/x-apple-diskimage,
+application/x-arc,
+application/x-archive, a
+application/x-atari-7800-rom, a78
 application/x-authorware-bin, aab
 application/x-authorware-map, aam
 application/x-authorware-seg, aas
+application/x-avira-qua,
 application/x-bcpio, bcpio
 application/x-bittorrent, torrent
 application/x-bsh, bsh
 application/x-bytecode.python, pyc
 application/x-bzip2, boz|bz2
 application/x-bzip, bz
+application/x-cbr, cbr
+application/x-cbz, cbz
 application/x-cdlink, vcd
 application/x-chat, cha|chat
+application/x-chrome-extension,
 application/x-cocoa, cco
 application/x-conference, nsc
+application/x-coredump,
 application/x-cpio, cpio
 application/x-dbf, dbf
 application/x-dbt,
+application/x-debian-package, deb
 application/x-deepv, deepv
-application/x-director, dcr|dir|dxr
+application/x-director, dir|dxr
+application/x-dmp, dmp
+application/x-dosdriver,
 application/x-dosexec, dll
 application/x-dvi, dvi
 application/x-elc, elc
+application/x-empty,
 application/x-envoy, env|evy
 application/x-esrehber, es
 application/x-excel, xla|xld|xlk|xlt|xlv
 application/x-executable, exe
+application/x-font-gdos,
+application/x-font-pf2, pf2
+application/x-font-pfm, pfm
 application/x-font-sfn,
 application/x-font-ttf, ttf|ttc
+application/x-fptapplication/x-dbt,
 application/x-freelance, pre
+application/x-gamecube-rom,
+application/x-gdbm,
+application/x-gettext-translation,
 application/x-git,
 application/x-gsp, gsp
 application/x-gss, gss
@@ -102,46 +141,68 @@ application/x-hdf, hdf
 application/x-helpfile, help
 application/x-httpd-imap, imap
 application/x-ima, ima
+application/x-innosetup,
 application/x-internett-signup, ins
 application/x-inventor, iv
 application/x-ip2, ip
 application/x-java-applet,
 application/x-java-commerce, jcm
 application/x-java-image,
+application/x-java-jmod, jmod
 application/x-java-keystore,
+application/x-kdelnk,
 application/x-koan, skd|skm|skp|skt
 application/x-latex, latex|ltx
 application/x-livescreen, ivy
 application/x-lotus, wq1
+application/x-lz4+json, jsonlz4
+application/x-lz4, lz4
+application/x-lz4, lz4
+application/x-lzh-compressed,
 application/x-lzh, lzh
+application/x-lzip, lz
+application/x-lzma, lzma
+application/x-lzop, lzo
 application/x-lzx, lzx
 application/x-mach-binary, jnilib|dylib
 application/x-mach-executable,
 application/x-magic-cap-package-1.0, mc$
 application/x-mathcad, mcd
+application/x-maxis-dbpf,
 application/x-meme, mm
 application/x-midi, midi
 application/x-mif, mif
 application/x-mix-transfer, nix
 application/xml, opf
+application/x-mobipocket-ebook, mobi
+application/vnd.amazon.mobi8-ebook, azw|azw3
+application/x-msaccess, accdb
+application/x-ms-compress-szdd, fon
 application/x-ms-pdb, pdb
+application/x-ms-reader, lit
+application/x-n64-rom, z64
 application/x-navi-animation, ani
 application/x-navidoc, nvd
 application/x-navimap, map
 application/x-navistyle, stl
+application/x-nes-rom, nes
 application/x-netcdf, cdf|nc
 application/x-newton-compatible-pkg, pkg
+application/x-nintendo-ds-rom,
 application/x-object, o
 application/x-omcdatamaker, omcd
 application/x-omc, omc
 application/x-omcregerator, omcr
 application/x-pagemaker, pm4|pm5
 application/x-pcl, pcl
+application/x-pgp-keyring,
 application/x-pixclscript, plx
 application/x-pkcs7-certreqresp, p7r
 application/x-pkcs7-signature, p7a
 application/x-project, mpc|mpt|mpv|mpx
 application/x-qpro, wb1
+application/x-rar, rar
+application/x-rpm, rpm
 application/x-sdp, sdp
 application/x-sea, sea
 application/x-seelogo, sl
@@ -149,12 +210,17 @@ application/x-setupscript,
 application/x-sharedlib, so
 application/x-shar, shar
 application/x-shockwave-flash, swf
+application/x-snappy-framed,
 application/x-sprite, spr|sprite
 application/x-sqlite3,
+application/x-stargallery-thm,
+application/x-stuffit, sit
 application/x-sv4cpio, sv4cpio
 application/x-sv4crc, sv4crc
 application/x-tar, tar
 application/x-tbook, sbk|tbk
+application/x-terminfo,
+application/x-terminfo2,
 application/x-texinfo, texi|texinfo
 application/x-tex-tfm, tfm
 application/x-ustar, ustar
@@ -163,16 +229,22 @@ application/x-vnd.audioexplosion.mzz, mzz
 application/x-vnd.ls-xpix, xpix
 application/x-vrml, vrml
 application/x-wais-source, src|wsrc
+application/x-wine-extension-ini,
 application/x-wintalk, wtk
 application/x-world, svr
 application/x-wri, wri
 application/x-x509-ca-cert, der
 application/x-xz, xz
+application/x-zip,
+application/x-zstd, zst
 application/zip, zip
+application/zlib, z
+!audio/basic, au
 audio/it, it
 audio/make, funk|my|pfunk
 audio/midi, kar
 audio/mid, rmi
+audio/mp4, m4b
 audio/mpeg, m2a|mpa
 audio/ogg, ogg
 audio/s3m, s3m
@@ -180,7 +252,10 @@ audio/tsp-audio, tsi
 audio/tsplayer, tsp
 audio/vnd.qcelp, qcp
 audio/voxware, vox
+audio/x-aiff, aiff|aif
+audio/x-flac, flac
 audio/x-gsm, gsd|gsm
+audio/x-hx-aac-adts,
 audio/x-jam, jam
 audio/x-liveaudio, lam
 audio/x-m4a, m4a
@@ -194,17 +269,24 @@ audio/x-nspaudio, lma
 audio/x-pn-realaudio, ram|rm|rmm|rmp
 audio/x-psid, sid
 audio/x-realaudio, ra
+audio/x-s3m,
 audio/x-twinvq-plugin, vqe|vql
 audio/x-twinvq, vqf
 audio/x-voc, voc
 audio/x-wav, wav
+!audio/x-xbox360-executable, xex
+!audio/x-xbox-executable, xbe
 font/otf,
 font/sfnt,
+font/woff2, woff2
+font/woff, woff
+image/bmp,
 image/cmu-raster, rast
 image/fif, fif
 image/florian, flo|turbot
 image/g3fax, g3
 image/gif, gif
+image/heic, heic
 image/ief, ief|iefs
 image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
 image/jutvision, jut
@@ -213,6 +295,9 @@ image/pict, pic|pict
 image/png, png|x-png
 !image/svg, svg
 !image/svg+xml,
+image/tiff,
+!image/vnd.adobe.photoshop, psd
+!image/vnd.djvu, djvu
 image/vnd.fpx, fpx
 image/vnd.microsoft.icon,
 image/vnd.rn-realflash, rf
@@ -220,9 +305,15 @@ image/vnd.rn-realpix, rp
 image/vnd.wap.wbmp, wbmp
 image/vnd.xiff, xif
 image/webp, webp
+image/wmf,
+image/x-3ds, 3ds
+image/x-award-bioslogo,
 image/x-cmu-raster, ras
+image/x-cur, tga
 image/x-dwg, dwg|dxf|svf
 image/x-eps,
+image/x-exr, exr
+image/x-gem,
 image/x-icns,
 !image/x-icon, ico
 image/x-jg, art
@@ -236,32 +327,31 @@ image/x-portable-graymap, pgm
 image/x-portable-pixmap, ppm
 image/x-quicktime, qif|qti|qtif
 image/x-rgb, rgb
+image/x-tga,
 image/x-tiff, tif|tiff
-image/tiff,
+image/x-win-bitmap,
 !image/x-xcf, xcf
 !image/x-xpixmap, xpm
+image/x-xwindowdump, xwd
+message/news,
 message/rfc822, mht|mhtml|mime
 model/vnd.dwf, dwf
+model/vnd.gdl, gdl
+model/vnd.gs.gdl, gdsl
 model/vrml, wrz
 model/x-pov, pov
 text/asp, asp
 text/css, css
-text/x-sass, sass
-text/x-scss, scss
 text/html, acgi|htm|html|htmls|htx|shtml
 text/javascript, js
 text/mcf, mcf
 text/pascal, pas
-text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
+text/PGP,
+text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml|make|log|markdown|yaml
+application/vnd.coffeescript, coffee
 text/richtext, rt|rtf|rtx
 text/rtf,
 text/scriplet, wsc
-text/x-awk, awk
-!video/x-jng, jng
-video/x-mng, mng
-image/x-cur, tga
-image/x-xwindowdump, xwd
-!image/vnd.adobe.photoshop, psd
 text/tab-separated-values, tsv
 text/troff, man|me|ms|roff|t|tr
 text/uri-list, uji|unis|uri|uris
@@ -273,6 +363,7 @@ text/webviewhtml, htt
 text/x-Algol68,
 text/x-asm, asm|s
 text/x-audiosoft-intra, aip
+text/x-awk, awk
 text/x-bcpl,
 text/x-c, c|cc|h
 text/x-c++, cpp|cxx|c++
@@ -287,23 +378,31 @@ text/x-makefile, am|mak
 text/xml, xml|pom|iml|plist
 text/x-m, m
 text/x-msdos-batch, bat
+text/x-ms-regedit, reg
+text/x-objective-c,
 text/x-pascal, p
 text/x-perl, pl
 text/x-php, php
+text/x-po, po
 text/x-python, py
 text/x-ruby, rb
+text/x-sass, sass
+text/x-scss, scss
 text/x-server-parsed-html, ssi
 text/x-setext, etx
 text/x-sgml, sgm|sgml
 text/x-shellscript, sh
 text/x-speech, talk
+text/x-tcl,
 text/x-tex, tex
 text/x-uil, uil
 text/x-uuencode, uue
 text/x-vcalendar, vcs
+text/x-vcard, vcf
 video/animaflex, afl
 video/avi, avi
 video/avs-video, avs
+video/MP2T,
 video/mp4, mp4
 video/mpeg, m1v|m2v|mpe|mpeg|mpg
 video/quicktime, moov|mov|qt
@@ -318,101 +417,36 @@ video/x-atomic3d-feature, fmf
 video/x-dl, dl
 video/x-dv, dif|dv
 video/x-fli, fli
+video/x-flv, flv
 video/x-isvideo, isu
+!video/x-jng, jng
+video/x-m4v, m4v
+video/x-matroska, mkv
+video/x-mng, mng
 video/x-motion-jpeg, mjpg
-video/x-ms-asf, asf|asx
+video/x-ms-asf, asf|asx|wmv
+video/x-msvideo, divx
 video/x-qtc, qtc
 video/x-sgi-movie, movie|mv
-application/x-7z-compressed, 7z
-application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
-text/x-po, po
-application/x-rpm, rpm
-application/x-debian-package, deb
-application/vnd.iccprofile, icm
-application/dicom, dcm
-image/x-exr, exr
-application/vnd.iccprofile, icm
-video/x-matroska, mkv
-application/x-empty,
-model/vnd.gdl, gdl
-model/vnd.gs.gdl, gdsl
-font/woff, woff
-font/woff2, woff2
-application/epub+zip, epub
-application/x-mobipocket-ebook, mobi
-audio/x-flac, flac
-application/x-rar, rar
-video/x-msvideo, divx
-video/x-flv, flv
-application/x-kdelnk,
-text/x-tcl,
-application/ogg, ogv
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
-application/vnd.ms-cab-compressed, cab
-audio/mp4, m4b
-!image/vnd.djvu, djvu
-application/x-ms-reader, lit
-application/CDFV2-corrupt,
-text/x-vcard, vcf
-application/x-innosetup,
-application/winhelp, hlp
-image/x-tga,
-application/x-wine-extension-ini,
-application/x-cbz, cbz
-application/x-cbr, cbr
-application/x-ms-compress-szdd, fon
-application/x-atari-7800-rom, a78
-application/x-nes-rom, nes
-application/x-font-pfm, pfm
-application/x-gettext-translation,
-image/wmf,
-application/pgp-keys,
-image/x-3ds, 3ds
-application/x-lz4, lz4
-application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
-application/vnd.oasis.opendocument.presentation, odp
-application/x-msaccess, accdb
-application/vnd.oasis.opendocument.spreadsheet, ods
-audio/x-aiff, aiff|aif
-text/x-ms-regedit, reg
-application/x-gamecube-rom,
-application/x-nintendo-ds-rom,
-text/x-objective-c,
-application/x-font-gdos,
-application/x-apple-diskimage,
-application/x-zstd, zst
-video/x-m4v, m4v
-message/news,
-application/vnd.symbian.install,
-application/x-lzh-compressed,
-application/x-dosdriver,
-application/vnd.tcpdump.pcap, pcap
 x-epoc/x-sisx-app,
-application/x-avira-qua,
-video/MP2T,
-application/x-snappy-framed,
-application/x-lz4+json, jsonlz4
-application/x-dmp, dmp
-application/zlib, z
-application/x-pgp-keyring,
-application/x-gdbm,
-application/x-font-pf2, pf2
-application/x-zip,
-application/x-coredump,
-application/x-java-jmod, jmod
-application/x-terminfo,
-application/x-terminfo2,
-application/x-arc,
-application/vnd.lotus-1-2-3,
-image/x-win-bitmap,
-application/x-maxis-dbpf,
-text/PGP,
-audio/x-hx-aac-adts,
-application/x-chrome-extension,
-image/heic, heic
-image/x-gem,
-application/x-lzma, lzma
-application/warc, warc
-application/x-lz4, lz4
-application/x-lzip, lz
-application/x-lzop, lzo
+application/x-zstd-dictionary,
+application/vnd.ms-outlook, msg
+image/x-olympus-orf, orf
+image/x-nikon-nef, nef
+image/x-fuji-raf, raf
+image/x-panasonic-raw, rw2|raw
+image/x-adobe-dng, dng
+image/x-canon-cr2, cr2
+image/x-canon-crw, crw
+image/x-dcraw,
+image/x-kodak-dcr, dcr
+image/x-kodak-k25, k25
+image/x-kodak-kdc, kdc
+image/x-minolta-mrw, mrw
+image/x-pentax-pef, pef
+image/x-sigma-x3f, xf3
+image/x-sony-arw, arw
+image/x-sony-sr2, sr2
+image/x-sony-srf, srf
+image/x-epson-erf, erf
+sist2/sidecar, s2meta
--- a/scripts/mime.py
+++ b/scripts/mime.py
@@ -3,6 +3,7 @@ noparse = set()
 ext_in_hash = set()

 major_mime = {
+    "sist2": 0,
    "model": 1,
    "example": 2,
    "message": 3,
@@ -18,7 +19,6 @@ major_mime = {

 pdf = (
    "application/pdf",
-    "application/x-cbz",
    "application/epub+zip",
    "application/vnd.ms-xpsdocument",
 )
@@ -62,6 +62,40 @@ doc = (
    "application/vnd.openxmlformats-officedocument.presentationml.presentation"
 )

+mobi = (
+    "application/x-mobipocket-ebook",
+    "application/vnd.amazon.mobi8-ebook"
+)
+
+markup = (
+    "text/xml",
+    "text/html",
+    "text/x-sgml"
+)
+
+raw = (
+    "image/x-olympus-orf",
+    "image/x-nikon-nef",
+    "image/x-fuji-raf",
+    "image/x-panasonic-raw",
+    "image/x-adobe-dng",
+    "image/x-canon-cr2",
+    "image/x-canon-crw",
+    "image/x-dcraw",
+    "image/x-kodak-dcr",
+    "image/x-kodak-k25",
+    "image/x-kodak-kdc",
+    "image/x-minolta-mrw",
+    "image/x-pentax-pef",
+    "image/x-sigma-x3f",
+    "image/x-sony-arw",
+    "image/x-sony-sr2",
+    "image/x-sony-srf",
+    "image/x-minolta-mrw",
+    "image/x-pentax-pef",
+    "image/x-epson-erf",
+)
+
 cnt = 1


@@ -82,8 +116,18 @@ def mime_id(mime):
        mime_id += " | 0x08000000"
    elif mime in doc:
        mime_id += " | 0x04000000"
+    elif mime in mobi:
+        mime_id += " | 0x02000000"
+    elif mime in markup:
+        mime_id += " | 0x01000000"
+    elif mime in raw:
+        mime_id += " | 0x00800000"
    elif mime == "application/x-empty":
+        cnt -= 1
        return "1"
+    elif mime == "sist2/sidecar":
+        cnt -= 1
+        return "2"
    return mime_id


@@ -91,7 +135,7 @@ def clean(t):
    return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")


-with open("mime.csv") as f:
+with open("scripts/mime.csv") as f:
    for l in f:
        mime, ext_list = l.split(",")
        if l.startswith("!"):
@@ -103,7 +147,7 @@ with open("mime.csv") as f:
    print("// **Generated by mime.py**")
    print("#ifndef MIME_GENERATED_C")
    print("#define MIME_GENERATED_C")
-    print("#include <glib-2.0/glib.h>\n")
+    print("#include <glib.h>\n")
    print("#include <stdlib.h>\n")
    # Enum
    print("enum mime {")
--- a/scripts/serve_static.py
+++ b/scripts/serve_static.py
@@ -1,10 +1,12 @@
 files = [
-    "web/css/bundle.css",
-    "web/css/bundle_dark.css",
-    "web/js/bundle.js",
-    "web/img/sprite-skin-flat.png",
-    "web/img/sprite-skin-flat-dark.png",
-    "web/search.html",
+    "src/static/css/bundle.css",
+    "src/static/css/bundle_dark.css",
+    "src/static/js/bundle.js",
+    "src/static/js/search.js",
+    "src/static/img/sprite-skin-flat.png",
+    "src/static/img/sprite-skin-flat-dark.png",
+    "src/static/search.html",
+    "src/static/stats.html",
 ]


--- a/src/cli.c
+++ b/src/cli.c
@@ -1,6 +1,5 @@
 #include "cli.h"
 #include "ctx.h"
-
 #include <tesseract/capi.h>

 #define DEFAULT_OUTPUT "index.sist2/"
@@ -10,10 +9,20 @@
 #define DEFAULT_REWRITE_URL ""

 #define DEFAULT_ES_URL "http://localhost:9200"
+#define DEFAULT_ES_INDEX "sist2"
 #define DEFAULT_BATCH_SIZE 100

-#define DEFAULT_BIND_ADDR "localhost"
-#define DEFAULT_PORT "4090"
+#define DEFAULT_LISTEN_ADDRESS "localhost:4090"
+#define DEFAULT_TREEMAP_THRESHOLD 0.0005
+
+#define DEFAULT_MAX_MEM_BUFFER 2000
+
+const char *TESS_DATAPATHS[] = {
+        "/usr/share/tessdata/",
+        "/usr/share/tesseract-ocr/tessdata/",
+        "./",
+        NULL
+};


 scan_args_t *scan_args_create() {
@@ -24,10 +33,18 @@ scan_args_t *scan_args_create() {
    return args;
 }

+exec_args_t *exec_args_create() {
+    exec_args_t *args = calloc(sizeof(exec_args_t), 1);
+    return args;
+}
+
 void scan_args_destroy(scan_args_t *args) {
    if (args->name != NULL) {
        free(args->name);
    }
+    if (args->incremental != NULL) {
+        free(args->incremental);
+    }
    if (args->path != NULL) {
        free(args->path);
    }
@@ -39,6 +56,12 @@ void scan_args_destroy(scan_args_t *args) {

 void index_args_destroy(index_args_t *args) {
    //todo
+    if (args->es_mappings_path) {
+        free(args->es_mappings);
+    }
+    if (args->es_settings_path) {
+        free(args->es_settings);
+    }
    free(args);
 }

@@ -47,6 +70,10 @@ void web_args_destroy(web_args_t *args) {
    free(args);
 }

+void exec_args_destroy(exec_args_t *args) {
+    free(args);
+}
+
 int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (argc < 2) {
        fprintf(stderr, "Required positional argument: PATH.\n");
@@ -62,10 +89,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    }

    if (args->incremental != NULL) {
-        abs_path = abspath(args->incremental);
+        args->incremental = abspath(args->incremental);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s\n", args->incremental);
-            return 1;
+            sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
+            args->incremental = NULL;
        }
    }

@@ -107,7 +134,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        return 1;
    }

-    if (args->depth < 0) {
+    if (args->depth <= 0) {
        args->depth = G_MAXINT32;
    } else {
        args->depth += 1;
@@ -115,6 +142,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    if (args->name == NULL) {
        args->name = g_path_get_basename(args->output);
+    } else {
+        char* tmp = malloc(strlen(args->name) + 1);
+        strcpy(tmp, args->name);
+        args->name = tmp;
    }

    if (args->rewrite_url == NULL) {
@@ -136,13 +167,53 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    if (args->tesseract_lang != NULL) {
        TessBaseAPI *api = TessBaseAPICreate();
-        ret = TessBaseAPIInit3(api, TESS_DATAPATH, args->tesseract_lang);
+
+        char filename[128];
+        sprintf(filename, "%s.traineddata", args->tesseract_lang);
+        const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
+        if (path == NULL) {
+            LOG_FATAL("cli.c", "Could not find tesseract language file!");
+        }
+
+        ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
        if (ret != 0) {
            fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
            return 1;
        }
        TessBaseAPIEnd(api);
        TessBaseAPIDelete(api);
+
+        args->tesseract_path = path;
+    }
+
+    if (args->exclude_regex != NULL) {
+        const char *error;
+        int error_offset;
+
+        pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
+        if (error != NULL) {
+            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
+        }
+
+        pcre_extra *re_extra = pcre_study(re, 0, &error);
+        if (error != NULL) {
+            LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
+        }
+
+        ScanCtx.exclude = re;
+        ScanCtx.exclude_extra = re_extra;
+    } else {
+        ScanCtx.exclude = NULL;
+    }
+
+    if (args->treemap_threshold_str == 0) {
+        args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
+    } else {
+        args->treemap_threshold = atof(args->treemap_threshold_str);
+    }
+
+    if (args->max_memory_buffer == 0) {
+        args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
    }

    LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
@@ -156,7 +227,40 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
    LOG_DEBUGF("cli.c", "arg path=%s", args->path)
    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
-    LOG_DEBUGF("cli.c", "arg ocr=%s", args->tesseract_lang)
+    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
+    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
+    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
+    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
+    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
+    LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
+
+    return 0;
+}
+
+int load_external_file(const char *file_path, char **dst) {
+    struct stat info;
+    int res = stat(file_path, &info);
+
+    if (res == -1) {
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        return 1;
+    }
+
+    int fd = open(file_path, O_RDONLY);
+    if (fd == -1) {
+        LOG_ERRORF("cli.c", "Error opening file '%s': %s\n", file_path, strerror(errno))
+        return 1;
+    }
+
+    *dst = malloc(info.st_size + 1);
+    res = read(fd, *dst, info.st_size);
+    if (res < 0) {
+        LOG_ERRORF("cli.c", "Error reading file '%s': %s\n", file_path, strerror(errno))
+        return 1;
+    }
+
+    *(*dst + info.st_size) = '\0';
+    close(fd);

    return 0;
 }
@@ -170,6 +274,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
        return 1;
    }

+    if (args->threads == 0) {
+        args->threads = 1;
+    } else if (args->threads < 0) {
+        fprintf(stderr, "Invalid threads: %d\n", args->threads);
+        return 1;
+    }
+
    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
        fprintf(stderr, "File not found: %s\n", argv[1]);
@@ -183,30 +294,26 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
        args->es_url = DEFAULT_ES_URL;
    }

+    if (args->es_index == NULL) {
+        args->es_index = DEFAULT_ES_INDEX;
+    }
+
    if (args->script_path != NULL) {
-        struct stat info;
-        int res = stat(args->script_path, &info);
-
-        if (res == -1) {
-            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+        if (load_external_file(args->script_path, &args->script) != 0) {
            return 1;
        }
-
-        int fd = open(args->script_path, O_RDONLY);
-        if (fd == -1) {
-            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
-            return 1;
    }

-        args->script = malloc(info.st_size + 1);
-        res = read(fd, args->script, info.st_size);
-        if (res == -1) {
-            fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
+    if (args->es_settings_path != NULL) {
+        if (load_external_file(args->es_settings_path, &args->es_settings) != 0) {
            return 1;
        }
+    }

-        *(args->script + info.st_size) = '\0';
-        close(fd);
+    if (args->es_mappings_path != NULL) {
+        if (load_external_file(args->es_mappings_path, &args->es_mappings) != 0) {
+            return 1;
+        }
    }

    if (args->batch_size == 0) {
@@ -214,10 +321,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
    }

    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
    LOG_DEBUGF("cli.c", "arg print=%d", args->print)
+    LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
+    LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
+    LOG_DEBUGF("cli.c", "arg es_settings_path=%s", args->es_settings_path)
+    LOG_DEBUGF("cli.c", "arg es_settings=%s", args->es_settings)
    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)

@@ -237,18 +350,57 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
        args->es_url = DEFAULT_ES_URL;
    }

-    if (args->bind == NULL) {
-        args->bind = DEFAULT_BIND_ADDR;
+    if (args->listen_address == NULL) {
+        args->listen_address = DEFAULT_LISTEN_ADDRESS;
    }

-    if (args->port == NULL) {
-        args->port = DEFAULT_PORT;
+    if (args->es_index == NULL) {
+        args->es_index = DEFAULT_ES_INDEX;
    }

    if (args->credentials != NULL) {
-        args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
-        //Remove trailing newline
-        *(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
+        char *ptr = strstr(args->credentials, ":");
+        if (ptr == NULL) {
+            fprintf(stderr, "Invalid --auth format, see usage\n");
+            return 1;
+        }
+
+        strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
+        strcpy(args->auth_pass, ptr + 1);
+
+        if (strlen(args->auth_user) == 0) {
+            fprintf(stderr, "--auth username must be at least one character long");
+            return 1;
+        }
+
+        args->auth_enabled = TRUE;
+    } else {
+        args->auth_enabled = FALSE;
+    }
+
+    if (args->tag_credentials != NULL && args->credentials != NULL) {
+        fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
+        return 1;
+    }
+
+    if (args->tag_credentials != NULL) {
+        char *ptr = strstr(args->tag_credentials, ":");
+        if (ptr == NULL) {
+            fprintf(stderr, "Invalid --tag-auth format, see usage\n");
+            return 1;
+        }
+
+        strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
+        strcpy(args->auth_pass, ptr + 1);
+
+        if (strlen(args->auth_user) == 0) {
+            fprintf(stderr, "--tag-auth username must be at least one character long");
+            return 1;
+        }
+
+        args->tag_auth_enabled = TRUE;
+    } else {
+        args->tag_auth_enabled = FALSE;
    }

    args->index_count = argc - 1;
@@ -263,10 +415,12 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    }

    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
-    LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
-    LOG_DEBUGF("cli.c", "arg port=%s", args->port)
+    LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
+    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
-    LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
+    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
+    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
+    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
    for (int i = 0; i < args->index_count; i++) {
        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
@@ -285,3 +439,39 @@ web_args_t *web_args_create() {
    return args;
 }

+int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
+
+    if (argc < 2) {
+        fprintf(stderr, "Required positional argument: PATH.\n");
+        return 1;
+    }
+
+    char *index_path = abspath(argv[1]);
+    if (index_path == NULL) {
+        fprintf(stderr, "File not found: %s\n", argv[1]);
+        return 1;
+    } else {
+        args->index_path = argv[1];
+        free(index_path);
+    }
+
+    if (args->es_url == NULL) {
+        args->es_url = DEFAULT_ES_URL;
+    }
+
+    if (args->es_index == NULL) {
+        args->es_index = DEFAULT_ES_INDEX;
+    }
+
+    if (args->script_path == NULL) {
+        LOG_FATAL("cli.c", "--script-file argument is required");
+    }
+
+    if (load_external_file(args->script_path, &args->script) != 0) {
+        return 1;
+    }
+
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
+    return 0;
+}
--- a/src/cli.h
+++ b/src/cli.h
@@ -3,6 +3,8 @@

 #include "sist.h"

+#include "libscan/arc/arc.h"
+
 typedef struct scan_args {
    float quality;
    int size;
@@ -17,39 +19,76 @@ typedef struct scan_args {
    char *archive;
    archive_mode_t archive_mode;
    char *tesseract_lang;
+    const char *tesseract_path;
+    char *exclude_regex;
+    int fast;
+    const char* treemap_threshold_str;
+    double treemap_threshold;
+    int max_memory_buffer;
 } scan_args_t;

 scan_args_t *scan_args_create();
+
 void scan_args_destroy(scan_args_t *args);
+
 int scan_args_validate(scan_args_t *args, int argc, const char **argv);

 typedef struct index_args {
    char *es_url;
+    char *es_index;
    const char *index_path;
    const char *script_path;
    char *script;
+    const char *es_settings_path;
+    char *es_settings;
+    const char *es_mappings_path;
+    char *es_mappings;
    int print;
    int batch_size;
+    int async_script;
    int force_reset;
+    int threads;
 } index_args_t;

 typedef struct web_args {
    char *es_url;
-    char *bind;
-    char *port;
+    char *es_index;
+    char *listen_address;
    char *credentials;
-    char *b64credentials;
+    char *tag_credentials;
+    char auth_user[256];
+    char auth_pass[256];
+    int auth_enabled;
+    int tag_auth_enabled;
    int index_count;
    const char **indices;
 } web_args_t;

+typedef struct exec_args {
+    char *es_url;
+    char *es_index;
+    const char *index_path;
+    const char *script_path;
+    int async_script;
+    char *script;
+} exec_args_t;
+
 index_args_t *index_args_create();
+
 void index_args_destroy(index_args_t *args);

 web_args_t *web_args_create();
+
 void web_args_destroy(web_args_t *args);

 int index_args_validate(index_args_t *args, int argc, const char **argv);
+
 int web_args_validate(web_args_t *args, int argc, const char **argv);

+exec_args_t *exec_args_create();
+
+void exec_args_destroy(exec_args_t *args);
+
+int exec_args_validate(exec_args_t *args, int argc, const char **argv);
+
 #endif
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -0,0 +1,6 @@
+#include "ctx.h"
+
+ScanCtx_t ScanCtx;
+WebCtx_t WebCtx;
+IndexCtx_t IndexCtx;
+LogCtx_t LogCtx;
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -2,8 +2,24 @@
 #define SIST2_CTX_H

 #include "sist.h"
+#include "tpool.h"
+#include "libscan/scan.h"
+#include "libscan/arc/arc.h"
+#include "libscan/comic/comic.h"
+#include "libscan/ebook/ebook.h"
+#include "libscan/font/font.h"
+#include "libscan/media/media.h"
+#include "libscan/ooxml/ooxml.h"
+#include "libscan/text/text.h"
+#include "libscan/mobi/scan_mobi.h"
+#include "libscan/raw/raw.h"
+#include "libscan/msdoc/msdoc.h"
+#include "src/io/store.h"

-struct {
+#include <glib.h>
+#include <pcre.h>
+
+typedef struct {
    struct index_t index;

    GHashTable *mime_table;
@@ -11,14 +27,8 @@ struct {

    tpool_t *pool;

-    int tn_size;
    int threads;
-    int content_size;
-    float tn_qscale;
    int depth;
-    archive_mode_t archive_mode;
-    int verbose;
-    int very_verbose;

    size_t stat_tn_size;
    size_t stat_index_size;
@@ -26,27 +36,54 @@ struct {
    GHashTable *original_table;
    GHashTable *copy_table;

-    pthread_mutex_t mupdf_mu;
-    char * tesseract_lang;
-} ScanCtx;
+    pcre *exclude;
+    pcre_extra *exclude_extra;
+    int fast;

-struct {
+    scan_arc_ctx_t arc_ctx;
+    scan_comic_ctx_t comic_ctx;
+    scan_ebook_ctx_t ebook_ctx;
+    scan_font_ctx_t font_ctx;
+    scan_media_ctx_t media_ctx;
+    scan_ooxml_ctx_t ooxml_ctx;
+    scan_text_ctx_t text_ctx;
+    scan_mobi_ctx_t mobi_ctx;
+    scan_raw_ctx_t raw_ctx;
+    scan_msdoc_ctx_t msdoc_ctx;
+} ScanCtx_t;
+
+typedef struct {
    int verbose;
    int very_verbose;
    int no_color;
-} LogCtx;
+} LogCtx_t;

-struct {
+typedef struct {
    char *es_url;
+    char *es_index;
    int batch_size;
-} IndexCtx;
+    tpool_t *pool;
+    store_t *tag_store;
+    GHashTable *tags;
+    store_t *meta_store;
+    GHashTable *meta;
+} IndexCtx_t;

-struct {
+typedef struct {
    char *es_url;
+    char *es_index;
    int index_count;
-    char *b64credentials;
-    struct index_t indices[16];
-} WebCtx;
+    char *auth_user;
+    char *auth_pass;
+    int auth_enabled;
+    int tag_auth_enabled;
+    struct index_t indices[64];
+} WebCtx_t;
+
+extern ScanCtx_t ScanCtx;
+extern WebCtx_t WebCtx;
+extern IndexCtx_t IndexCtx;
+extern LogCtx_t LogCtx;


 #endif
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -1,11 +1,7 @@
 #include "elastic.h"
 #include "src/ctx.h"

-#include <stdlib.h>
 #include "web.h"
-#include <stdio.h>
-#include <string.h>
-#include <cJSON/cJSON.h>

 #include "static_generated.c"

@@ -13,19 +9,33 @@
 typedef struct es_indexer {
    int queued;
    char *es_url;
+    char *es_index;
    es_bulk_line_t *line_head;
    es_bulk_line_t *line_tail;
 } es_indexer_t;


-static es_indexer_t *Indexer;
+static __thread es_indexer_t *Indexer;

-void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
+void delete_queue(int max);
+
+void elastic_flush();
+
+void elastic_cleanup() {
+    elastic_flush();
+    if (Indexer != NULL) {
+        free(Indexer->es_index);
+        free(Indexer->es_url);
+        free(Indexer);
+    }
+}
+
+void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {

    cJSON *line = cJSON_CreateObject();

-    cJSON_AddStringToObject(line, "_id", uuid_str);
-    cJSON_AddStringToObject(line, "_index", "sist2");
+    cJSON_AddStringToObject(line, "_id", id_str);
+    cJSON_AddStringToObject(line, "_index", IndexCtx.es_index);
    cJSON_AddStringToObject(line, "_type", "_doc");
    cJSON_AddItemReferenceToObject(line, "_source", document);

@@ -37,23 +47,31 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    cJSON_Delete(line);
 }

-void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
+void index_json_func(void *arg) {
+    es_bulk_line_t *line = arg;
+    elastic_index_line(line);
+}

+void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
    es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
    memcpy(bulk_line->line, json, json_len);
-    memcpy(bulk_line->uuid_str, uuid_str, UUID_STR_LEN);
+    memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
    *(bulk_line->line + json_len) = '\n';
    *(bulk_line->line + json_len + 1) = '\0';
    bulk_line->next = NULL;

    cJSON_free(json);
-    elastic_index_line(bulk_line);
+    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

-void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
+void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
+
+    if (Indexer == NULL) {
+        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
+    }

    cJSON *body = cJSON_CreateObject();
    cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
@@ -64,12 +82,19 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
    cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
    cJSON_AddStringToObject(term_obj, "index", index_id);

-    char * str = cJSON_Print(body);
+    char *str = cJSON_Print(body);

    char bulk_url[4096];
-    snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
-    response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
+    if (async) {
+        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
+                 Indexer->es_index);
+    } else {
+        snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
+    }
+    response_t *r = web_post(bulk_url, str);
+    if (!async) {
        LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
+    }
    cJSON *resp = cJSON_Parse(r->body);

    cJSON_free(str);
@@ -84,31 +109,39 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
        cJSON_free(error_str);
    }

+    if (async) {
+        cJSON *task = cJSON_GetObjectItem(resp, "task");
+        LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
+    }
+
    cJSON_Delete(resp);
 }

-void elastic_flush() {
-
-    if (Indexer == NULL) {
-        Indexer = create_indexer(IndexCtx.es_url);
-    }
-
+void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
    es_bulk_line_t *line = Indexer->line_head;
-
-    int count = 0;
+    *count = 0;

    size_t buf_size = 0;
    size_t buf_cur = 0;
-    char *buf = malloc(1);
+    char *buf = malloc(8192);
+    size_t buf_capacity = 8192;
+
+    while (line != NULL && *count < max) {
+        char action_str[256];
+        snprintf(
+                action_str, sizeof(action_str),
+                "{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
+                line->path_md5_str, Indexer->es_index
+        );

-    while (line != NULL) {
-        char action_str[512];
-        snprintf(action_str, 512,
-                "{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
        size_t action_str_len = strlen(action_str);
-
        size_t line_len = strlen(line->line);
-        buf = realloc(buf, buf_size + line_len + action_str_len);
+
+        while (buf_size + line_len + action_str_len > buf_capacity) {
+            buf_capacity *= 2;
+            buf = realloc(buf, buf_capacity);
+        }
+
        buf_size += line_len + action_str_len;

        memcpy(buf + buf_cur, action_str, action_str_len);
@@ -116,50 +149,147 @@ void elastic_flush() {
        memcpy(buf + buf_cur, line->line, line_len);
        buf_cur += line_len;

-        es_bulk_line_t *tmp = line;
        line = line->next;
-        free(tmp);
-        count++;
-    }
-    buf = realloc(buf, buf_size + 1);
-    *(buf+buf_cur) = '\0';
-
-    Indexer->line_head = NULL;
-    Indexer->line_tail = NULL;
-    Indexer->queued = 0;
-
-    char bulk_url[4096];
-    snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
-    response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
-
-    if (r->status_code == 0) {
-        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
+        (*count)++;
    }

-    LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_cur / 1024, r->status_code);
+    if (buf_size + 1 > buf_capacity) {
+        buf = realloc(buf, buf_capacity + 1);
+    }

-    cJSON *ret_json = cJSON_Parse(r->body);
+    *(buf + buf_cur) = '\0';
+
+    *buf_len = buf_cur;
+    return buf;
+}
+
+void print_errors(response_t *r) {
+    char *tmp = malloc(r->size + 1);
+    memcpy(tmp, r->body, r->size);
+    *(tmp + r->size) = '\0';
+
+    cJSON *ret_json = cJSON_Parse(tmp);
    if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
        cJSON *err;
        cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
            if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
-                char* str = cJSON_Print(err);
+                char *str = cJSON_Print(err);
                LOG_ERRORF("elastic.c", "%s\n", str);
                cJSON_free(str);
            }
        }
    }
-
    cJSON_Delete(ret_json);
+    free(tmp);
+}
+
+void print_error(response_t *r) {
+    char *tmp = malloc(r->size + 1);
+    memcpy(tmp, r->body, r->size);
+    *(tmp + r->size) = '\0';
+
+    cJSON *ret_json = cJSON_Parse(tmp);
+    if (cJSON_GetObjectItem(ret_json, "error") != NULL) {
+        char *str = cJSON_Print(cJSON_GetObjectItem(ret_json, "error"));
+        LOG_ERRORF("elastic.c", "%s\n", str);
+        cJSON_free(str);
+    }
+    cJSON_Delete(ret_json);
+    free(tmp);
+}
+
+void _elastic_flush(int max) {
+
+    if (max == 0) {
+        LOG_WARNING("elastic.c", "calling _elastic_flush with 0 in queue")
+        return;
+    }
+
+    size_t buf_len;
+    int count;
+    void *buf = create_bulk_buffer(max, &count, &buf_len);
+
+    char bulk_url[4096];
+    snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_bulk?pipeline=tie", Indexer->es_url, Indexer->es_index);
+    response_t *r = web_post(bulk_url, buf);
+
+    if (r->status_code == 0) {
+        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
+    }
+
+    if (r->status_code == 413) {
+
+        if (max <= 1) {
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
+            free_response(r);
+            free(buf);
+            delete_queue(1);
+            if (Indexer->queued != 0) {
+                elastic_flush();
+            }
+            return;
+        }
+
+        LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
+
+        free_response(r);
+        free(buf);
+        _elastic_flush(max / 2);
+        return;
+
+    } else if (r->status_code == 429) {
+
+        free_response(r);
+        free(buf);
+        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
+        usleep(1000000 * 20);
+        _elastic_flush(max);
+        return;
+
+    } else if (r->status_code != 200) {
+        print_errors(r);
+        delete_queue(Indexer->queued);
+
+    } else {
+
+        print_errors(r);
+        LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
+        delete_queue(max);
+
+        if (Indexer->queued != 0) {
+            elastic_flush();
+        }
+    }

    free_response(r);
    free(buf);
 }

+void delete_queue(int max) {
+    for (int i = 0; i < max; i++) {
+        es_bulk_line_t *tmp = Indexer->line_head;
+        Indexer->line_head = tmp->next;
+        if (Indexer->line_head == NULL) {
+            Indexer->line_tail = NULL;
+        }
+        free(tmp);
+        Indexer->queued -= 1;
+    }
+}
+
+void elastic_flush() {
+
+    if (Indexer == NULL) {
+        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
+    }
+
+    _elastic_flush(Indexer->queued);
+}
+
 void elastic_index_line(es_bulk_line_t *line) {

    if (Indexer == NULL) {
-        Indexer = create_indexer(IndexCtx.es_url);
+        Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
    }

    if (Indexer->line_head == NULL) {
@@ -177,14 +307,18 @@ void elastic_index_line(es_bulk_line_t *line) {
    }
 }

-es_indexer_t *create_indexer(const char *url) {
+es_indexer_t *create_indexer(const char *url, const char *index) {

    char *es_url = malloc(strlen(url) + 1);
    strcpy(es_url, url);

+    char *es_index = malloc(strlen(index) + 1);
+    strcpy(es_index, index);
+
    es_indexer_t *indexer = malloc(sizeof(es_indexer_t));

    indexer->es_url = es_url;
+    indexer->es_index = es_index;
    indexer->queued = 0;
    indexer->line_head = NULL;
    indexer->line_tail = NULL;
@@ -192,41 +326,42 @@ es_indexer_t *create_indexer(const char *url) {
    return indexer;
 }

-void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
+void finish_indexer(char *script, int async_script, char *index_id) {

    char url[4096];

-    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
-    response_t *r = web_post(url, "", NULL);
+    snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
+    response_t *r = web_post(url, "");
    LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
    free_response(r);

    if (script != NULL) {
-        execute_update_script(script, index_id);
-    }
+        execute_update_script(script, async_script, index_id);
+        free(script);

-    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
-    r = web_post(url, "", NULL);
+        snprintf(url, sizeof(url), "%s/%s/_refresh", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_post(url, "");
        LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
        free_response(r);
+    }

-    snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
-    r = web_post(url, "", NULL);
+    snprintf(url, sizeof(url), "%s/%s/_forcemerge", IndexCtx.es_url, IndexCtx.es_index);
+    r = web_post(url, "");
    LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
    free_response(r);

-    if (Indexer != NULL) {
-        free(Indexer->es_url);
-        free(Indexer);
-    }
+    snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
+    r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
+    LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
+    free_response(r);
 }

-void elastic_init(int force_reset) {
+void elastic_init(int force_reset, const char* user_mappings, const char* user_settings) {

    // Check if index exists
    char url[4096];
-    snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
-    response_t *r = web_get(url);
+    snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
+    response_t *r = web_get(url, 30);
    int index_exists = r->status_code == 200;
    free_response(r);

@@ -235,42 +370,86 @@ void elastic_init(int force_reset) {
        LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
-        r = web_put(url, "", NULL);
+        snprintf(url, sizeof(url), "%s/%s", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_put(url, "");
+
+        if (r->status_code != 200) {
+            print_error(r);
+            LOG_FATAL("elastic.c", "Could not create index")
+        }
+
        LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
-        r = web_post(url, "", NULL);
+        snprintf(url, sizeof(url), "%s/%s/_close", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_post(url, "");
        LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
-        r = web_put(url, settings_json, "Content-Type: application/json");
-        LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
+        snprintf(url, sizeof(url), "%s/_ingest/pipeline/tie", IndexCtx.es_url);
+        r = web_put(url, pipeline_json);
+        LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
-        r = web_put(url, mappings_json, "Content-Type: application/json");
-        LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
+        snprintf(url, sizeof(url), "%s/%s/_settings", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_put(url, user_settings ? user_settings : settings_json);
+        LOG_INFOF("elastic.c", "Update user_settings <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
-        r = web_post(url, "", NULL);
+        snprintf(url, sizeof(url), "%s/%s/_mappings/_doc?include_type_name=true", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_put(url, user_mappings ? user_mappings : mappings_json);
+        LOG_INFOF("elastic.c", "Update user_mappings <%d>", r->status_code);
+        free_response(r);
+
+        snprintf(url, sizeof(url), "%s/%s/_open", IndexCtx.es_url, IndexCtx.es_index);
+        r = web_post(url, "");
        LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
        free_response(r);
    }
 }

-cJSON *elastic_get_document(const char *uuid_str) {
+cJSON *elastic_get_document(const char *id_str) {
    char url[4096];
-    snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);
+    snprintf(url, sizeof(url), "%s/%s/_doc/%s", WebCtx.es_url, WebCtx.es_index, id_str);

-    response_t *r = web_get(url);
+    response_t *r = web_get(url, 3);
    cJSON *json = NULL;
    if (r->status_code == 200) {
-        json = cJSON_Parse(r->body);
+        char *tmp = malloc(r->size + 1);
+        memcpy(tmp, r->body, r->size);
+        *(tmp + r->size) = '\0';
+        json = cJSON_Parse(tmp);
+        free(tmp);
    }
    free_response(r);
    return json;
 }
+
+char *elastic_get_status() {
+    char url[4096];
+    snprintf(url, sizeof(url),
+             "%s/_cluster/state/metadata/%s?filter_path=metadata.indices.*.state", WebCtx.es_url, WebCtx.es_index);
+
+    response_t *r = web_get(url, 30);
+    cJSON *json = NULL;
+    char *status = malloc(128 * sizeof(char));
+    status[0] = '\0';
+
+    if (r->status_code == 200) {
+        char *tmp = malloc(r->size + 1);
+        memcpy(tmp, r->body, r->size);
+        *(tmp + r->size) = '\0';
+        json = cJSON_Parse(tmp);
+        free(tmp);
+        const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
+        if (metadata != NULL) {
+            const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
+            const cJSON *index = cJSON_GetObjectItem(indices, WebCtx.es_index);
+            const cJSON *state = cJSON_GetObjectItem(index, "state");
+            strcpy(status, state->valuestring);
+        }
+    }
+    free_response(r);
+    cJSON_Delete(json);
+    return status;
+}
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -5,7 +5,7 @@

 typedef struct es_bulk_line {
    struct es_bulk_line *next;
-    char uuid_str[UUID_STR_LEN];
+    char path_md5_str[MD5_STR_LENGTH];
    char line[0];
 } es_bulk_line_t;

@@ -16,18 +16,21 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void elastic_flush();
+void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);

-void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
+void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);

-void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
+es_indexer_t *create_indexer(const char *url, const char *index);

-es_indexer_t *create_indexer(const char* es_url);
+void elastic_cleanup();
+void finish_indexer(char *script, int async_script, char *index_id);

-void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
+void elastic_init(int force_reset, const char* user_mappings, const char* user_settings);

-void elastic_init(int force_reset);
+cJSON *elastic_get_document(const char *id_str);

-cJSON *elastic_get_document(const char *uuid_str);
+char *elastic_get_status();
+
+void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);

 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@@ -1,4 +1,11 @@
 #include "web.h"
+#include "src/sist.h"
+#include "src/ctx.h"
+
+#include <mongoose.h>
+#include <pthread.h>
+#include <curl/curl.h>
+

 size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {

@@ -9,11 +16,91 @@ size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
 }

 void free_response(response_t *resp) {
+    if (resp->body != NULL) {
        free(resp->body);
+    }
    free(resp);
 }

-response_t *web_get(const char *url) {
+void web_post_async_poll(subreq_ctx_t* req) {
+    fd_set fdread;
+    fd_set fdwrite;
+    fd_set fdexcep;
+    int maxfd = -1;
+
+    FD_ZERO(&fdread);
+    FD_ZERO(&fdwrite);
+    FD_ZERO(&fdexcep);
+
+    CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
+
+    if(mc != CURLM_OK) {
+        req->done = TRUE;
+        return;
+    }
+
+    if (maxfd == -1) {
+        // no fds ready yet
+        return;
+    }
+
+    struct timeval timeout = {1, 0};
+    int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
+
+    switch(rc) {
+        case -1:
+            req->done = TRUE;
+            break;
+        case 0:
+            break;
+        default:
+            curl_multi_perform(req->multi, &req->running_handles);
+            break;
+    }
+
+    if (req->running_handles == 0) {
+        req->done = TRUE;
+        req->response->body = req->response_buf.buf;
+        req->response->size = req->response_buf.cur;
+        curl_easy_getinfo(req->handle, CURLINFO_RESPONSE_CODE, &req->response->status_code);
+
+        curl_multi_cleanup(req->multi);
+        curl_easy_cleanup(req->handle);
+        curl_slist_free_all(req->headers);
+        return;
+    }
+}
+
+subreq_ctx_t *web_post_async(const char *url, char *data) {
+    subreq_ctx_t *req = calloc(1, sizeof(subreq_ctx_t));
+    req->response = calloc(1, sizeof(response_t));
+    req->data = data;
+    req->response_buf = dyn_buffer_create();
+
+    req->handle = curl_easy_init();
+    CURL *curl = req->handle;
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&req->response_buf));
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl, CURLOPT_POST, 1);
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+
+    struct curl_slist *headers = NULL;
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
+
+    req->multi = curl_multi_init();
+    curl_multi_add_handle(req->multi, curl);
+    curl_multi_perform(req->multi, &req->running_handles);
+
+    LOG_DEBUGF("web.c", "async request POST %s", url)
+
+    return req;
+}
+
+response_t *web_get(const char *url, int timeout) {
    response_t *resp = malloc(sizeof(response_t));

    CURL *curl;
@@ -24,18 +111,24 @@ response_t *web_get(const char *url) {
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
+
+    struct curl_slist *headers = NULL;
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);

    resp->body = buffer.buf;
    resp->size = buffer.cur;
    return resp;
 }

-response_t *web_post(const char *url, const char *data, const char *header) {
+response_t *web_post(const char *url, const char *data) {

    response_t *resp = malloc(sizeof(response_t));

@@ -50,10 +143,8 @@ response_t *web_post(const char *url, const char *data, const char *header) {
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");

    struct curl_slist *headers = NULL;
-    if (header != NULL) {
-        headers = curl_slist_append(headers, header);
+    headers = curl_slist_append(headers, "Content-Type: application/json");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-    }

    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);

@@ -70,7 +161,7 @@ response_t *web_post(const char *url, const char *data, const char *header) {
 }


-response_t *web_put(const char *url, const char *data, const char *header) {
+response_t *web_put(const char *url, const char *data) {

    response_t *resp = malloc(sizeof(response_t));

@@ -86,11 +177,9 @@ response_t *web_put(const char *url, const char *data, const char *header) {
    curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );

-    if (header != NULL) {
    struct curl_slist *headers = NULL;
-        headers = curl_slist_append(headers, header);
+    headers = curl_slist_append(headers, "Content-Type: application/json");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-    }

    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);

@@ -98,6 +187,7 @@ response_t *web_put(const char *url, const char *data, const char *header) {
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);

    resp->body = buffer.buf;
    resp->size = buffer.cur;
@@ -119,11 +209,15 @@ response_t *web_delete(const char *url) {
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");

    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
+    struct curl_slist *headers = NULL;
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);

    resp->body = buffer.buf;
    resp->size = buffer.cur;
--- a/src/index/web.h
+++ b/src/index/web.h
@@ -2,6 +2,8 @@
 #define SIST2_WEB_H

 #include "src/sist.h"
+#include <mongoose.h>
+#include <curl/curl.h>

 typedef struct response {
    char *body;
@@ -9,9 +11,27 @@ typedef struct response {
    int status_code;
 } response_t;

-response_t *web_get(const char *url);
-response_t *web_post(const char * url, const char * data, const char* header);
-response_t *web_put(const char *url, const char *data, const char *header);
+typedef struct {
+    response_t *resp;
+    int done;
+} http_ev_data_t;
+
+typedef struct {
+    char* data;
+    dyn_buffer_t response_buf;
+    struct curl_slist *headers;
+    CURL *handle;
+    CURLM *multi;
+    response_t *response;
+    int running_handles;
+    int done;
+} subreq_ctx_t;
+
+response_t *web_get(const char *url, int timeout);
+response_t *web_post(const char * url, const char * data);
+void web_post_async_poll(subreq_ctx_t* req);
+subreq_ctx_t *web_post_async(const char *url, char *data);
+response_t *web_put(const char *url, const char *data);
 response_t *web_delete(const char *url);

 void free_response(response_t *resp);
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -1,16 +1,18 @@
 #include "src/ctx.h"
 #include "serialize.h"
+#include "src/parsing/parse.h"
+#include "src/parsing/mime.h"

 static __thread int index_fd = -1;

 typedef struct {
-    unsigned char uuid[16];
-    unsigned long ino;
+    unsigned char path_md5[MD5_DIGEST_LENGTH];
    unsigned long size;
    unsigned int mime;
    int mtime;
    short base;
    short ext;
+    char has_parent;
 } line_t;

 void skip_meta(FILE *file) {
@@ -30,7 +32,7 @@ void skip_meta(FILE *file) {

 void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON *json = cJSON_CreateObject();
-    cJSON_AddStringToObject(json, "uuid", desc->uuid);
+    cJSON_AddStringToObject(json, "id", desc->id);
    cJSON_AddStringToObject(json, "version", desc->version);
    cJSON_AddStringToObject(json, "root", desc->root);
    cJSON_AddStringToObject(json, "name", desc->name);
@@ -39,11 +41,14 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);

    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
-    if (fd == -1) {
-        perror(path);
+    if (fd < 0) {
+        LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
    }
    char *str = cJSON_Print(json);
-    write(fd, str, strlen(str));
+    int ret = write(fd, str, strlen(str));
+    if (ret == -1) {
+        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
+    }
    free(str);
    close(fd);

@@ -57,11 +62,14 @@ index_descriptor_t read_index_descriptor(char *path) {
    int fd = open(path, O_RDONLY);

    if (fd == -1) {
-        LOG_FATAL("serialize.c", "Invalid/corrupt index (Could not find descriptor)\n")
+        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
    }

    char *buf = malloc(info.st_size + 1);
-    read(fd, buf, info.st_size);
+    int ret = read(fd, buf, info.st_size);
+    if (ret == -1) {
+        LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
+    }
    *(buf + info.st_size) = '\0';
    close(fd);

@@ -74,7 +82,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
    descriptor.root_len = (short) strlen(descriptor.root);
    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
-    strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
+    strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
    if (cJSON_GetObjectItem(json, "type") == NULL) {
        strcpy(descriptor.type, INDEX_TYPE_BIN);
    } else {
@@ -118,6 +126,32 @@ char *get_meta_key_text(enum metakey meta_key) {
            return "font_name";
        case MetaParent:
            return "parent";
+        case MetaExifMake:
+            return "exif_make";
+        case MetaExifSoftware:
+            return "exif_software";
+        case MetaExifExposureTime:
+            return "exif_exposure_time";
+        case MetaExifFNumber:
+            return "exif_fnumber";
+        case MetaExifFocalLength:
+            return "exif_focal_length";
+        case MetaExifUserComment:
+            return "exif_user_comment";
+        case MetaExifIsoSpeedRatings:
+            return "exif_iso_speed_ratings";
+        case MetaExifModel:
+            return "exif_model";
+        case MetaExifDateTime:
+            return "exif_datetime";
+        case MetaAuthor:
+            return "author";
+        case MetaModifiedBy:
+            return "modified_by";
+        case MetaThumbnail:
+            return "thumbnail";
+        case MetaPages:
+            return "pages";
        default:
            return NULL;
    }
@@ -140,8 +174,8 @@ void write_document(document_t *doc) {
    dyn_buffer_t buf = dyn_buffer_create();

    // Ignore root directory in the file path
-    doc->ext = doc->ext - ScanCtx.index.desc.root_len;
-    doc->base = doc->base - ScanCtx.index.desc.root_len;
+    doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
+    doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
    doc->filepath += ScanCtx.index.desc.root_len;

    dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -152,11 +186,11 @@ void write_document(document_t *doc) {
        dyn_buffer_write_char(&buf, meta->key);

        if (IS_META_INT(meta->key)) {
-            dyn_buffer_write_int(&buf, meta->intval);
+            dyn_buffer_write_int(&buf, meta->int_val);
        } else if (IS_META_LONG(meta->key)) {
-            dyn_buffer_write_long(&buf, meta->longval);
+            dyn_buffer_write_long(&buf, meta->long_val);
        } else {
-            dyn_buffer_write_str(&buf, meta->strval);
+            dyn_buffer_write_str(&buf, meta->str_val);
        }

        meta_line_t *tmp = meta;
@@ -167,7 +201,7 @@ void write_document(document_t *doc) {

    int res = write(index_fd, buf.buf, buf.cur);
    if (res == -1) {
-        perror("write");
+        LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
    }
    ScanCtx.stat_index_size += buf.cur;
    dyn_buffer_destroy(&buf);
@@ -175,6 +209,8 @@ void write_document(document_t *doc) {

 void thread_cleanup() {
    close(index_fd);
+    cleanup_parse();
+    cleanup_font();
 }


@@ -183,9 +219,9 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
    dyn_buffer_t buf = dyn_buffer_create();

    FILE *file = fopen(path, "rb");
-    while (1) {
+    while (TRUE) {
        buf.cur = 0;
-        fread((void *) &line, 1, sizeof(line_t), file);
+        size_t _ = fread((void *) &line, 1, sizeof(line_t), file);
        if (feof(file)) {
            break;
        }
@@ -193,14 +229,19 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        cJSON *document = cJSON_CreateObject();
        cJSON_AddStringToObject(document, "index", index_id);

-        char uuid_str[UUID_STR_LEN];
-        uuid_unparse(line.uuid, uuid_str);
+        char path_md5_str[MD5_STR_LENGTH];
+        buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);

+        const char *mime_text = mime_get_mime_text(line.mime);
+        if (mime_text == NULL) {
+            cJSON_AddNullToObject(document, "mime");
+        } else {
            cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
+        }
        cJSON_AddNumberToObject(document, "size", (double) line.size);
        cJSON_AddNumberToObject(document, "mtime", line.mtime);

-        int c;
+        int c = 0;
        while ((c = getc(file)) != 0) {
            dyn_buffer_write_char(&buf, (char) c);
        }
@@ -212,42 +253,42 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        } else {
            *(buf.buf + line.ext) = '\0';
        }
-        cJSON_AddStringToObject(document, "name", buf.buf + line.base);
+
+        char tmp[PATH_MAX * 3];
+
+        str_escape(tmp, buf.buf + line.base);
+        cJSON_AddStringToObject(document, "name", tmp);
+
        if (line.base > 0) {
            *(buf.buf + line.base - 1) = '\0';
-            cJSON_AddStringToObject(document, "path", buf.buf);
+
+            str_escape(tmp, buf.buf);
+            cJSON_AddStringToObject(document, "path", tmp);
        } else {
            cJSON_AddStringToObject(document, "path", "");
        }

        enum metakey key = getc(file);
+        size_t ret = 0;
        while (key != '\n') {
            switch (key) {
+                case MetaPages:
                case MetaWidth:
                case MetaHeight: {
                    int value;
-                    fread(&value, sizeof(int), 1, file);
+                    ret = fread(&value, sizeof(int), 1, file);
                    cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
                    break;
                }
                case MetaMediaDuration:
                case MetaMediaBitrate: {
                    long value;
-                    fread(&value, sizeof(long), 1, file);
+                    ret = fread(&value, sizeof(long), 1, file);
                    cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
                    break;
                }
                case MetaMediaAudioCodec:
-                case MetaMediaVideoCodec: {
-                    int value;
-                    fread(&value, sizeof(int), 1, file);
-                    const AVCodecDescriptor *desc = avcodec_descriptor_get(value);
-                    if (desc != NULL) {
-                        cJSON_AddStringToObject(document, get_meta_key_text(key), desc->name);
-                    }
-                    break;
-                }
-
+                case MetaMediaVideoCodec:
                case MetaContent:
                case MetaArtist:
                case MetaAlbum:
@@ -255,6 +296,18 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
                case MetaGenre:
                case MetaFontName:
                case MetaParent:
+                case MetaExifMake:
+                case MetaExifSoftware:
+                case MetaExifExposureTime:
+                case MetaExifFNumber:
+                case MetaExifFocalLength:
+                case MetaExifUserComment:
+                case MetaExifIsoSpeedRatings:
+                case MetaExifDateTime:
+                case MetaExifModel:
+                case MetaAuthor:
+                case MetaModifiedBy:
+                case MetaThumbnail:
                case MetaTitle: {
                    buf.cur = 0;
                    while ((c = getc(file)) != 0) {
@@ -273,8 +326,36 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
            key = getc(file);
        }

-        func(document, uuid_str);
+        cJSON *meta_obj = NULL;
+        if (IndexCtx.meta != NULL) {
+            const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
+            if (meta_string != NULL) {
+                meta_obj = cJSON_Parse(meta_string);
+
+                cJSON *child;
+                for (child = meta_obj->child; child != NULL; child = child->next) {
+                    char meta_key[4096];
+                    strcpy(meta_key, child->string);
+                    cJSON_DeleteItemFromObject(document, meta_key);
+                    cJSON_AddItemReferenceToObject(document, meta_key, child);
+                }
+            }
+        }
+
+        if (IndexCtx.tags != NULL) {
+            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
+            if (tags_string != NULL) {
+                cJSON *tags_arr = cJSON_Parse(tags_string);
+                cJSON_DeleteItemFromObject(document, "tag");
+                cJSON_AddItemToObject(document, "tag", tags_arr);
+            }
+        }
+
+        func(document, path_md5_str);
        cJSON_Delete(document);
+        if (meta_obj) {
+            cJSON_Delete(meta_obj);
+        }
    }
    dyn_buffer_destroy(&buf);
    fclose(file);
@@ -298,11 +379,11 @@ const char *json_type_array_fields[] = {
 void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {

    FILE *file = fopen(path, "r");
-    while (1) {
+    while (TRUE) {
        char *line = NULL;
        size_t len;
        size_t read = getline(&line, &len, file);
-        if (read == -1) {
+        if (read < 0) {
            if (line) {
                free(line);
            }
@@ -318,7 +399,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
        }

        cJSON *document = cJSON_CreateObject();
-        const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
+        const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;

        for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
            cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
@@ -346,7 +427,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
            }
        }

-        func(document, uuid_str);
+        func(document, id_str);
        cJSON_Delete(document);
        cJSON_Delete(input);

@@ -354,7 +435,7 @@ void read_index_json(const char *path, UNUSED(const char *index_id), index_func
    fclose(file);
 }

-void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
+void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {

    if (strcmp(type, INDEX_TYPE_BIN) == 0) {
        read_index_bin(path, index_id, func);
@@ -367,13 +448,15 @@ void incremental_read(GHashTable *table, const char *filepath) {
    FILE *file = fopen(filepath, "rb");
    line_t line;

+    LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
+
    while (1) {
-        fread((void *) &line, 1, sizeof(line_t), file);
-        if (feof(file)) {
+        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
+        if (ret != 1 || feof(file)) {
            break;
        }

-        incremental_put(table, line.ino, line.mtime);
+        incremental_put(table, line.path_md5, line.mtime);

        while ((getc(file))) {}
        skip_meta(file);
@@ -391,41 +474,55 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
    FILE *dst_file = fopen(dst_filepath, "ab");
    line_t line;

-    while (1) {
-        fread((void *) &line, 1, sizeof(line_t), file);
-        if (feof(file)) {
+    LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
+
+    while (TRUE) {
+        size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
+        if (ret != 1 || feof(file)) {
            break;
        }

-        if (incremental_get(copy_table, line.ino)) {
+        // Assume that files with parents still exist.
+        //  One way to "fix" this would be to check if the parent is marked for copy but it would consistently
+        //  delete files with grandparents, which is a side-effect worse than having orphaned files
+        if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
            fwrite(&line, sizeof(line), 1, dst_file);

-            size_t buf_len;
-            char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
-            store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
-            free(buf);
-
+            // Copy filepath
+            char filepath_buf[PATH_MAX];
            char c;
+            char *ptr = filepath_buf;
            while ((c = (char) getc(file))) {
-                fwrite(&c, sizeof(c), 1, dst_file);
+                *ptr++ = c;
+            }
+            *ptr = '\0';
+            fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
+
+            // Copy tn store contents
+            size_t buf_len;
+            char path_md5[MD5_DIGEST_LENGTH];
+            MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
+            char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
+            if (buf_len != 0) {
+                store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
+                free(buf);
            }
-            fwrite("\0", sizeof(c), 1, dst_file);

            enum metakey key;
            while (1) {
                key = getc(file);
+                fwrite(&key, sizeof(char), 1, dst_file);
                if (key == '\n') {
                    break;
                }
-                fwrite(&key, sizeof(char), 1, dst_file);

                if (IS_META_INT(key)) {
                    int val;
-                    fread(&val, sizeof(val), 1, file);
+                    ret = fread(&val, sizeof(val), 1, file);
                    fwrite(&val, sizeof(val), 1, dst_file);
                } else if (IS_META_LONG(key)) {
                    long val;
-                    fread(&val, sizeof(val), 1, file);
+                    ret = fread(&val, sizeof(val), 1, file);
                    fwrite(&val, sizeof(val), 1, dst_file);
                } else {
                    while ((c = (char) getc(file))) {
@@ -435,8 +532,10 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                }
            }
        } else {
+            while ((getc(file))) {}
            skip_meta(file);
        }
    }
    fclose(file);
+    fclose(dst_file);
 }
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -2,16 +2,19 @@
 #define SIST2_SERIALIZE_H

 #include "src/sist.h"
-#include <sys/syscall.h>
+#include "store.h"

-typedef void(*index_func)(cJSON *, const char[UUID_STR_LEN]);
+#include <sys/syscall.h>
+#include <glib.h>
+
+typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);

 void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
                      const char *dst_filepath, GHashTable *copy_table);

 void write_document(document_t *doc);

-void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);
+void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath);

--- a/src/io/store.c
+++ b/src/io/store.c
@@ -1,9 +1,10 @@
 #include "store.h"
 #include "src/ctx.h"

-store_t *store_create(char *path) {
+store_t *store_create(char *path, size_t chunk_size) {

    store_t *store = malloc(sizeof(struct store_t));
+    store->chunk_size = chunk_size;
    pthread_rwlock_init(&store->lock, NULL);

    mdb_env_create(&store->env);
@@ -15,11 +16,10 @@ store_t *store_create(char *path) {
    );

    if (open_ret != 0) {
-        fprintf(stderr, "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path);
-        exit(1);
+        LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
    }

-    store->size = (size_t) 1024 * 1024 * 5;
+    store->size = (size_t) store->chunk_size;
    ScanCtx.stat_tn_size = 0;
    mdb_env_set_mapsize(store->env, store->size);

@@ -40,12 +40,20 @@ void store_destroy(store_t *store) {
    free(store);
 }

+void store_flush(store_t *store) {
+    mdb_env_sync(store->env, TRUE);
+}
+
 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

    if (LogCtx.very_verbose) {
-        char uuid_str[UUID_STR_LEN];
-        uuid_unparse((unsigned char *) key, uuid_str);
-        LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
+        if (key_len == MD5_DIGEST_LENGTH) {
+            char path_md5_str[MD5_STR_LENGTH];
+            buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
+            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
+        } else {
+            LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
+        }
    }

    MDB_val mdb_key;
@@ -70,7 +78,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
        // Cannot resize when there is a opened transaction.
        //  Resize take effect on the next commit.
        pthread_rwlock_wrlock(&store->lock);
-        store->size += 1024 * 1024 * 50;
+        store->size += store->chunk_size;
        mdb_env_set_mapsize(store->env, store->size);
        mdb_txn_begin(store->env, NULL, 0, &txn);
        put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
@@ -82,7 +90,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
    pthread_rwlock_unlock(&store->lock);

    if (put_ret != 0) {
-        printf("%s\n", mdb_strerror(put_ret));
+        LOG_ERROR("store.c", mdb_strerror(put_ret))
    }
 }

@@ -111,3 +119,42 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
    return buf;
 }

+GHashTable *store_read_all(store_t *store) {
+
+    int count = 0;
+
+    GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
+
+    MDB_txn *txn = NULL;
+    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
+
+    MDB_cursor *cur = NULL;
+    mdb_cursor_open(txn, store->dbi, &cur);
+
+    MDB_val key;
+    MDB_val value;
+
+    while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
+        char *key_str = malloc(key.mv_size);
+        memcpy(key_str, key.mv_data, key.mv_size);
+        char *val_str = malloc(value.mv_size);
+        memcpy(val_str, value.mv_data, value.mv_size);
+
+        g_hash_table_insert(table, key_str, val_str);
+        count += 1;
+    }
+
+    const char *path;
+    mdb_env_get_path(store->env, &path);
+    LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
+
+    mdb_cursor_close(cur);
+    mdb_txn_abort(txn);
+    return table;
+}
+
+
+void store_copy(store_t *store, const char *destination) {
+    mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
+    mdb_env_copy(store->env, destination);
+}
--- a/src/io/store.h
+++ b/src/io/store.h
@@ -4,21 +4,32 @@
 #include <pthread.h>
 #include <lmdb.h>

+#include <glib.h>
+
+#define STORE_SIZE_TN 1024 * 1024 * 5
+#define STORE_SIZE_TAG 1024 * 16
+#define STORE_SIZE_META STORE_SIZE_TAG
+
 typedef struct store_t {
    MDB_dbi dbi;
    MDB_env *env;
    size_t size;
+    size_t chunk_size;
    pthread_rwlock_t lock;
 } store_t;

-#include "src/sist.h"
-
-store_t *store_create(char *path);
+store_t *store_create(char *path, size_t chunk_size);

 void store_destroy(store_t *store);

 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);

+void store_flush(store_t *store);
+
 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);

+GHashTable *store_read_all(store_t *store);
+
+void store_copy(store_t *store, const char *destination);
+
 #endif
--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -1,5 +1,8 @@
 #include "walk.h"
 #include "src/ctx.h"
+#include "src/parsing/parse.h"
+
+#include <ftw.h>

 __always_inline
 parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
@@ -15,12 +18,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
        job->ext = len;
    }

-    job->info = *info;
+    job->vfile.info = *info;

-    memset(job->parent, 0, 16);
+    memset(job->parent, 0, MD5_DIGEST_LENGTH);

    job->vfile.filepath = job->filepath;
    job->vfile.read = fs_read;
+    job->vfile.reset = fs_reset;
    job->vfile.close = fs_close;
    job->vfile.fd = -1;
    job->vfile.is_fs_file = TRUE;
@@ -28,8 +32,18 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
    return job;
 }

+int sub_strings[30];
+#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
+
 int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
-    if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
+
+    if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
+
+        if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
+            LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
+            return 0;
+        }
+
        parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
        tpool_add_work(ScanCtx.pool, parse, job);
    }
--- a/src/io/walk.h
+++ b/src/io/walk.h
@@ -3,8 +3,6 @@

 #define _XOPEN_SOURCE 500

-#include "src/sist.h"
-
 int walk_directory_tree(const char *);

 #endif
--- a/src/log.c
+++ b/src/log.c
@@ -1,15 +1,17 @@
 #include "log.h"

+#include <pthread.h>
+#include <stdarg.h>
+
 const char *log_colors[] = {
-        "\033[34m", "\033[01;34m", "\033[0m",
-        "\033[01;33m", "\033[31m", "\033[01;31m"
+        "\033[34m", "\033[01;34m", "\033[01;33m", "\033[0m", "\033[31m", "\033[01;31m"
 };

 const char *log_levels[] = {
        "DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
 };

-void sist_logf(char *filepath, int level, char *format, ...) {
+void vsist_logf(const char *filepath, int level, char *format, va_list ap) {

    static int is_tty = -1;
    if (is_tty == -1) {
@@ -31,23 +33,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04X]%s [%s] [%s %s] ",
+                "\033[%dm[%04llX]%s [%s] [%s %s] ",
                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
                datetime, log_levels[level], filepath
        );
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04X] [%s] [%s %s] ",
+                "[%04llX] [%s] [%s %s] ",
                pid, datetime, log_levels[level], filepath
        );
    }

-    va_list ap;
-    va_start(ap, format);
    size_t maxsize = sizeof(log_str) - log_len;
    log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
-    va_end(ap);

    if (is_tty) {
        log_len += sprintf(log_str + log_len, "\033[0m\n");
@@ -56,10 +55,20 @@ void sist_logf(char *filepath, int level, char *format, ...) {
        log_len += 1;
    }

-    write(STDERR_FILENO, log_str, log_len);
+    int ret = write(STDERR_FILENO, log_str, log_len);
+    if (ret == -1) {
+        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno))
+    }
 }

-void sist_log(char *filepath, int level, char *str) {
+void sist_logf(const char *filepath, int level, char *format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    vsist_logf(filepath, level, format, ap);
+    va_end(ap);
+}
+
+void sist_log(const char *filepath, int level, char *str) {

    static int is_tty = -1;
    if (is_tty == -1) {
@@ -81,7 +90,7 @@ void sist_log(char *filepath, int level, char *str) {
    if (is_tty) {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
+                "\033[%dm[%04llX]%s [%s] [%s %s] %s \033[0m\n",
                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
                datetime, log_levels[level], filepath,
                str
@@ -89,11 +98,14 @@ void sist_log(char *filepath, int level, char *str) {
    } else {
        log_len = snprintf(
                log_str, sizeof(log_str),
-                "[%04X] [%s] [%s %s] %s \n",
+                "[%04llX] [%s] [%s %s] %s \n",
                pid, datetime, log_levels[level], filepath,
                str
        );
    }

-    write(STDERR_FILENO, log_str, log_len);
+    int ret = write(STDERR_FILENO, log_str, log_len);
+    if (ret == -1) {
+        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
+    }
 }
--- a/src/log.h
+++ b/src/log.h
@@ -1,6 +1,7 @@
 #ifndef SIST2_LOG_H
 #define SIST2_LOG_H

+
 #define LOG_MAX_LENGTH 8192

 #define SIST_DEBUG 0
@@ -36,10 +37,11 @@
    sist_log(filepath, SIST_FATAL, str);\
    exit(-1);

-#include "src/sist.h"
+#include "sist.h"

-void sist_logf(char *filepath, int level, char *format, ...);
+void sist_logf(const char *filepath, int level, char *format, ...);
+void vsist_logf(const char *filepath, int level, char *format, va_list ap);

-void sist_log(char *filepath, int level, char *str);
+void sist_log(const char *filepath, int level, char *str);

 #endif
--- a/src/main.c
+++ b/src/main.c
@@ -1,32 +1,43 @@
 #include "sist.h"
 #include "ctx.h"

+#include <third-party/argparse/argparse.h>
+#include <locale.h>
+
+#include "cli.h"
+#include "io/serialize.h"
+#include "io/store.h"
+#include "tpool.h"
+#include "io/walk.h"
+#include "index/elastic.h"
+#include "web/serve.h"
+#include "parsing/mime.h"
+#include "parsing/parse.h"
+
+#include "stats.h"
+
 #define DESCRIPTION "Lightning-fast file system indexer and search tool."

 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "1.2.2";
+static const char *const Version = "2.9.0";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
        "sist2 web [OPTION]... INDEX...",
+        "sist2 exec-script [OPTION]... INDEX",
        NULL,
 };

-void global_init() {
-    curl_global_init(CURL_GLOBAL_NOTHING);
-    av_log_set_level(AV_LOG_QUIET);
-    opcInitLibrary();
-}
-
 void init_dir(const char *dirpath) {
    char path[PATH_MAX];
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

-    uuid_t uuid;
-    uuid_generate(uuid);
-    uuid_unparse(uuid, ScanCtx.index.desc.uuid);
+    unsigned char index_md5[MD5_DIGEST_LENGTH];
+    MD5((unsigned char *) ScanCtx.index.desc.name, strlen(ScanCtx.index.desc.name), index_md5);
+    buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
+
    time(&ScanCtx.index.desc.timestamp);
    strcpy(ScanCtx.index.desc.version, Version);
    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);
@@ -38,29 +49,151 @@ void scan_print_header() {
    LOG_INFOF("main.c", "sist2 v%s", Version)
 }

-void sist2_scan(scan_args_t *args) {
+void _store(char *key, size_t key_len, char *buf, size_t buf_len) {
+    store_write(ScanCtx.index.store, key, key_len, buf, buf_len);
+}
+
+void _log(const char *filepath, int level, char *str) {
+    if (level == LEVEL_FATAL) {
+        sist_log(filepath, level, str);
+        exit(-1);
+    }
+
+    if (LogCtx.verbose) {
+        if (level == LEVEL_DEBUG) {
+            if (LogCtx.very_verbose) {
+                sist_log(filepath, level, str);
+            }
+        } else {
+            sist_log(filepath, level, str);
+        }
+    }
+}
+
+void _logf(const char *filepath, int level, char *format, ...) {
+
+    va_list args;
+
+    va_start(args, format);
+    if (level == LEVEL_FATAL) {
+        vsist_logf(filepath, level, format, args);
+        exit(-1);
+    }
+
+    if (LogCtx.verbose) {
+        if (level == LEVEL_DEBUG) {
+            if (LogCtx.very_verbose) {
+                vsist_logf(filepath, level, format, args);
+            }
+        } else {
+            vsist_logf(filepath, level, format, args);
+        }
+    }
+    va_end(args);
+}
+
+void initialize_scan_context(scan_args_t *args) {
+
+    // Arc
+    ScanCtx.arc_ctx.mode = args->archive_mode;
+    ScanCtx.arc_ctx.log = _log;
+    ScanCtx.arc_ctx.logf = _logf;
+    ScanCtx.arc_ctx.parse = (parse_callback_t) parse;
+
+    // Comic
+    ScanCtx.comic_ctx.log = _log;
+    ScanCtx.comic_ctx.logf = _logf;
+    ScanCtx.comic_ctx.store = _store;
+    ScanCtx.comic_ctx.tn_size = args->size;
+    ScanCtx.comic_ctx.tn_qscale = args->quality;
+    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
+    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
+
+    // Ebook
+    pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
+    ScanCtx.ebook_ctx.content_size = args->content_size;
+    ScanCtx.ebook_ctx.tn_size = args->size;
+    ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
+    ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
+    ScanCtx.ebook_ctx.log = _log;
+    ScanCtx.ebook_ctx.logf = _logf;
+    ScanCtx.ebook_ctx.store = _store;
+
+    // Font
+    ScanCtx.font_ctx.enable_tn = args->size > 0;
+    ScanCtx.font_ctx.log = _log;
+    ScanCtx.font_ctx.logf = _logf;
+    ScanCtx.font_ctx.store = _store;
+
+    // Media
+    ScanCtx.media_ctx.tn_qscale = args->quality;
+    ScanCtx.media_ctx.tn_size = args->size;
+    ScanCtx.media_ctx.log = _log;
+    ScanCtx.media_ctx.logf = _logf;
+    ScanCtx.media_ctx.store = _store;
+    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
+    init_media();
+
+    // OOXML
+    ScanCtx.ooxml_ctx.content_size = args->content_size;
+    ScanCtx.ooxml_ctx.log = _log;
+    ScanCtx.ooxml_ctx.logf = _logf;
+    ScanCtx.ooxml_ctx.store = _store;
+
+    // MOBI
+    ScanCtx.mobi_ctx.content_size = args->content_size;
+    ScanCtx.mobi_ctx.log = _log;
+    ScanCtx.mobi_ctx.logf = _logf;
+
+    // TEXT
+    ScanCtx.text_ctx.content_size = args->content_size;
+    ScanCtx.text_ctx.log = _log;
+    ScanCtx.text_ctx.logf = _logf;
+
+    // MSDOC
+    ScanCtx.msdoc_ctx.tn_size = args->size;
+    ScanCtx.msdoc_ctx.content_size = args->content_size;
+    ScanCtx.msdoc_ctx.log = _log;
+    ScanCtx.msdoc_ctx.logf = _logf;
+    ScanCtx.msdoc_ctx.store = _store;
+    ScanCtx.msdoc_ctx.msdoc_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/msword");

-    ScanCtx.tn_qscale = args->quality;
-    ScanCtx.tn_size = args->size;
-    ScanCtx.content_size = args->content_size;
    ScanCtx.threads = args->threads;
    ScanCtx.depth = args->depth;
-    ScanCtx.archive_mode = args->archive_mode;
+
    strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
    strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
    strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
+    strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
    ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
-    ScanCtx.tesseract_lang = args->tesseract_lang;
+    ScanCtx.fast = args->fast;

-    init_dir(ScanCtx.index.path);
+    // Raw
+    ScanCtx.raw_ctx.tn_qscale = args->quality;
+    ScanCtx.raw_ctx.tn_size = args->size;
+    ScanCtx.raw_ctx.log = _log;
+    ScanCtx.raw_ctx.logf = _logf;
+    ScanCtx.raw_ctx.store = _store;
+}
+
+
+void sist2_scan(scan_args_t *args) {

    ScanCtx.mime_table = mime_get_mime_table();
    ScanCtx.ext_table = mime_get_ext_table();

+    initialize_scan_context(args);
+
+    init_dir(ScanCtx.index.path);
+
    char store_path[PATH_MAX];
    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
    mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
-    ScanCtx.index.store = store_create(store_path);
+    ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);
+
+    snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
+    mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
+    ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);

    scan_print_header();

@@ -70,23 +203,32 @@ void sist2_scan(scan_args_t *args) {

        DIR *dir = opendir(args->incremental);
        if (dir == NULL) {
-            perror("opendir");
-            return;
+            LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
        }
+
+        char descriptor_path[PATH_MAX];
+        snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
+        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
+
+        if (strcmp(original_desc.version, Version) != 0) {
+            LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
+                       Version, INDEX_VERSION_EXTERNAL)
+        }
+
        struct dirent *de;
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
                incremental_read(ScanCtx.original_table, file_path);
            }
        }
        closedir(dir);

-        printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
+        LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
    }

-    ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
+    ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
    tpool_start(ScanCtx.pool);
    walk_directory_tree(ScanCtx.index.desc.root);
    tpool_wait(ScanCtx.pool);
@@ -96,7 +238,7 @@ void sist2_scan(scan_args_t *args) {
        char dst_path[PATH_MAX];
        snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
        snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
-        store_t *source = store_create(store_path);
+        store_t *source = store_create(store_path, STORE_SIZE_TN);

        DIR *dir = opendir(args->incremental);
        if (dir == NULL) {
@@ -107,24 +249,34 @@ void sist2_scan(scan_args_t *args) {
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
                char file_path[PATH_MAX];
-                snprintf(file_path, PATH_MAX, "%s/%s", args->incremental, de->d_name);
+                snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
                incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
            }
        }
        closedir(dir);
        store_destroy(source);
+
+        snprintf(store_path, PATH_MAX, "%stags", args->incremental);
+        snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
+        mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
+        store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
+        store_copy(source_tags, dst_path);
+        store_destroy(source_tags);
    }

+    generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
+
    store_destroy(ScanCtx.index.store);
 }

 void sist2_index(index_args_t *args) {

    IndexCtx.es_url = args->es_url;
+    IndexCtx.es_index = args->es_index;
    IndexCtx.batch_size = args->batch_size;

    if (!args->print) {
-        elastic_init(args->force_reset);
+        elastic_init(args->force_reset, args->es_mappings, args->es_settings);
    }

    char descriptor_path[PATH_MAX];
@@ -135,17 +287,25 @@ void sist2_index(index_args_t *args) {
    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)

    if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
-        fprintf(stderr, "Version mismatch! Index is %s but executable is %s/%s\n",
-                desc.version, Version, INDEX_VERSION_EXTERNAL);
-        return;
+        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
+                   INDEX_VERSION_EXTERNAL)
    }

    DIR *dir = opendir(args->index_path);
    if (dir == NULL) {
-        perror("opendir");
-        return;
+        LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
    }

+    char path_tmp[PATH_MAX];
+    snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
+    mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
+    IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
+    IndexCtx.tags = store_read_all(IndexCtx.tag_store);
+
+    snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
+    IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
+    IndexCtx.meta = store_read_all(IndexCtx.meta_store);
+
    index_func f;
    if (args->print) {
        f = print_json;
@@ -153,27 +313,64 @@ void sist2_index(index_args_t *args) {
        f = index_json;
    }

+    void (*cleanup)();
+    if (args->print) {
+        cleanup = NULL;
+    } else {
+        cleanup = elastic_cleanup;
+    }
+
+    IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
+    tpool_start(IndexCtx.pool);
+
    struct dirent *de;
    while ((de = readdir(dir)) != NULL) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
            snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
-            read_index(file_path, desc.uuid, desc.type, f);
+            read_index(file_path, desc.id, desc.type, f);
        }
    }
    closedir(dir);

+    tpool_wait(IndexCtx.pool);
+
+    tpool_destroy(IndexCtx.pool);
+
    if (!args->print) {
-        elastic_flush();
-        destroy_indexer(args->script, desc.uuid);
+        finish_indexer(args->script, args->async_script, desc.id);
    }
+
+    store_destroy(IndexCtx.tag_store);
+    g_hash_table_remove_all(IndexCtx.tags);
+    g_hash_table_destroy(IndexCtx.tags);
+}
+
+void sist2_exec_script(exec_args_t *args) {
+
+    LogCtx.verbose = TRUE;
+
+    char descriptor_path[PATH_MAX];
+    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
+    index_descriptor_t desc = read_index_descriptor(descriptor_path);
+
+    IndexCtx.es_url = args->es_url;
+
+    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+
+    execute_update_script(args->script, args->async_script, desc.id);
+    free(args->script);
 }

 void sist2_web(web_args_t *args) {

    WebCtx.es_url = args->es_url;
+    WebCtx.es_index = args->es_index;
    WebCtx.index_count = args->index_count;
-    WebCtx.b64credentials = args->b64credentials;
+    WebCtx.auth_user = args->auth_user;
+    WebCtx.auth_pass = args->auth_pass;
+    WebCtx.auth_enabled = args->auth_enabled;
+    WebCtx.tag_auth_enabled = args->tag_auth_enabled;

    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
@@ -183,7 +380,11 @@ void sist2_web(web_args_t *args) {
        char path_tmp[PATH_MAX];

        snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
-        WebCtx.indices[i].store = store_create(path_tmp);
+        WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
+
+        snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
+        mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
+        WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);

        snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
        WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -193,21 +394,25 @@ void sist2_web(web_args_t *args) {
        free(abs_path);
    }

-    serve(args->bind, args->port);
+    serve(args->listen_address);
 }


 int main(int argc, const char *argv[]) {
-
-    global_init();
+    setlocale(LC_ALL, "");

    scan_args_t *scan_args = scan_args_create();
    index_args_t *index_args = index_args_create();
    web_args_t *web_args = web_args_create();
+    exec_args_t *exec_args = exec_args_create();

    int arg_version = 0;

    char *common_es_url = NULL;
+    char *common_es_index = NULL;
+    char *common_script_path = NULL;
+    int common_async_script = 0;
+    int common_threads = 0;

    struct argparse_option options[] = {
            OPT_HELP(),
@@ -217,7 +422,7 @@ int main(int argc, const char *argv[]) {
            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),

            OPT_GROUP("Scan options"),
-            OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
            OPT_FLOAT('q', "quality", &scan_args->quality,
                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
            OPT_INTEGER(0, "size", &scan_args->size,
@@ -236,20 +441,39 @@ int main(int argc, const char *argv[]) {
                                                          "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
            OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
                                                             "which are installed on your machine)"),
+            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
+            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
+            OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
+                                                                                  "(see USAGE.md). DEFAULT: 0.0005"),
+            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
+                        "Maximum memory buffer size per thread in MB for files inside archives "
+                        "(see USAGE.md). DEFAULT: 2000"),

            OPT_GROUP("Index options"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
-            OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
+            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
+            OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
+            OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
+            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),
            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
                                                                      "(You must use this option the first time you use the index command)"),

            OPT_GROUP("Web options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
-            OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
-            OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+            OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
+            OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
+
+            OPT_GROUP("Exec-script options"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
+            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
+            OPT_BOOLEAN(0, "async-script", &common_async_script, "Execute user script asynchronously."),

            OPT_END(),
    };
@@ -261,7 +485,7 @@ int main(int argc, const char *argv[]) {

    if (arg_version) {
        printf(Version);
-        exit(0);
+        goto end;
    }

    if (LogCtx.very_verbose != 0) {
@@ -270,25 +494,35 @@ int main(int argc, const char *argv[]) {

    web_args->es_url = common_es_url;
    index_args->es_url = common_es_url;
+    exec_args->es_url = common_es_url;
+
+    web_args->es_index = common_es_index;
+    index_args->es_index = common_es_index;
+    exec_args->es_index = common_es_index;
+
+    index_args->script_path = common_script_path;
+    exec_args->script_path = common_script_path;
+    index_args->threads = common_threads;
+    scan_args->threads = common_threads;
+    exec_args->async_script = common_async_script;
+    index_args->async_script = common_async_script;

    if (argc == 0) {
        argparse_usage(&argparse);
-        return 1;
+        goto end;
    } else if (strcmp(argv[0], "scan") == 0) {

        int err = scan_args_validate(scan_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_scan(scan_args);

-    }
-
-    else if (strcmp(argv[0], "index") == 0) {
+    } else if (strcmp(argv[0], "index") == 0) {

        int err = index_args_validate(index_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_index(index_args);

@@ -296,22 +530,30 @@ int main(int argc, const char *argv[]) {

        int err = web_args_validate(web_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_web(web_args);

+    } else if (strcmp(argv[0], "exec-script") == 0) {
+
+        int err = exec_args_validate(exec_args, argc, argv);
+        if (err != 0) {
+            goto end;
        }
-    else {
+        sist2_exec_script(exec_args);
+
+    } else {
        fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
        argparse_usage(&argparse);
-        return 1;
+        goto end;
    }
    printf("\n");

+    end:
    scan_args_destroy(scan_args);
-
    index_args_destroy(index_args);
    web_args_destroy(web_args);
+    exec_args_destroy(exec_args);

    return 0;
 }
--- a/src/parsing/arc.c
+++ b/src/parsing/arc.c
@@ -1,157 +0,0 @@
-#include "arc.h"
-#include "src/ctx.h"
-
-#define ARC_BUF_SIZE 8192
-
-int should_parse_filtered_file(const char *filepath, int ext) {
-    char tmp[PATH_MAX * 2];
-
-    if (ext == 0) {
-        return FALSE;
-    }
-
-    memcpy(tmp, filepath, ext - 1);
-    *(tmp + ext - 1) = '\0';
-
-    char *idx = strrchr(tmp, '.');
-
-    if (idx == NULL) {
-        return FALSE;
-    }
-
-    if (strcmp(idx, ".tar") == 0) {
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
-int arc_read(struct vfile *f, void *buf, size_t size) {
-    return archive_read_data(f->arc, buf, size);
-}
-
-typedef struct arc_data {
-    vfile_t *f;
-    char buf[ARC_BUF_SIZE];
-} arc_data_f;
-
-int vfile_open_callback(struct archive *a, void *user_data) {
-    arc_data_f *data = user_data;
-
-    if (data->f->is_fs_file && data->f->fd == -1) {
-        data->f->fd = open(data->f->filepath, O_RDONLY);
-    }
-
-    return ARCHIVE_OK;
-}
-
-long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
-    arc_data_f *data = user_data;
-
-    *buf = data->buf;
-    return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
-}
-
-int vfile_close_callback(struct archive *a, void *user_data) {
-    arc_data_f *data = user_data;
-
-    if (data->f->close != NULL) {
-        data->f->close(data->f);
-    }
-
-    return ARCHIVE_OK;
-}
-
-void parse_archive(vfile_t *f, document_t *doc) {
-
-    struct archive *a;
-    struct archive_entry *entry;
-
-
-    arc_data_f data;
-    data.f = f;
-
-    int ret = 0;
-    if (data.f->is_fs_file) {
-
-        a = archive_read_new();
-        archive_read_support_filter_all(a);
-        archive_read_support_format_all(a);
-
-        ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
-    } else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
-
-        a = archive_read_new();
-        archive_read_support_filter_all(a);
-        archive_read_support_format_all(a);
-
-        ret = archive_read_open(
-                a, &data,
-                vfile_open_callback,
-                vfile_read_callback,
-                vfile_close_callback
-        );
-    } else {
-        return;
-    }
-
-    if (ret != ARCHIVE_OK) {
-        LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
-        archive_read_free(a);
-        return;
-    }
-
-    if (ScanCtx.archive_mode == ARC_MODE_LIST) {
-
-        dyn_buffer_t buf = dyn_buffer_create();
-
-        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
-            if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
-
-                char *path = (char *) archive_entry_pathname(entry);
-
-                dyn_buffer_append_string(&buf, path);
-                dyn_buffer_write_char(&buf, '\n');
-            }
-        }
-        dyn_buffer_write_char(&buf, '\0');
-
-        meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
-        meta_list->key = MetaContent;
-        strcpy(meta_list->strval, buf.buf);
-        APPEND_META(doc, meta_list);
-        dyn_buffer_destroy(&buf);
-
-    } else {
-
-        parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
-
-        sub_job->vfile.close = NULL;
-        sub_job->vfile.read = arc_read;
-        sub_job->vfile.arc = a;
-        sub_job->vfile.filepath = sub_job->filepath;
-        sub_job->vfile.is_fs_file = FALSE;
-        memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
-
-        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
-            sub_job->info = *archive_entry_stat(entry);
-            if (S_ISREG(sub_job->info.st_mode)) {
-                sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
-                sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
-
-                char *p = strrchr(sub_job->filepath, '.');
-                if (p != NULL) {
-                    sub_job->ext = (int) (p - sub_job->filepath + 1);
-                } else {
-                    sub_job->ext = (int) strlen(sub_job->filepath);
-                }
-
-                parse(sub_job);
-            }
-        }
-
-        free(sub_job);
-    }
-
-    archive_read_free(a);
-}
--- a/src/parsing/arc.h
+++ b/src/parsing/arc.h
@@ -1,12 +0,0 @@
-#ifndef SIST2_ARC_H
-#define SIST2_ARC_H
-
-#include "src/sist.h"
-
-int should_parse_filtered_file(const char *filepath, int ext);
-
-void parse_archive(vfile_t *f, document_t *doc);
-
-int arc_read(struct vfile * f, void *buf, size_t size);
-
-#endif
--- a/src/parsing/doc.c
+++ b/src/parsing/doc.c
@@ -1,107 +0,0 @@
-#include "doc.h"
-#include "src/ctx.h"
-
-void dump_text(mceTextReader_t *reader, dyn_buffer_t *buf) {
-
-    mce_skip_attributes(reader);
-
-    mce_start_children(reader) {
-        mce_start_element(reader, NULL, _X("t")) {
-            mce_skip_attributes(reader);
-            mce_start_children(reader) {
-                mce_start_text(reader) {
-                    char *str = (char *) xmlTextReaderConstValue(reader->reader);
-                    dyn_buffer_append_string(buf, str);
-                    dyn_buffer_write_char(buf, ' ');
-                } mce_end_text(reader);
-            } mce_end_children(reader);
-        } mce_end_element(reader);
-
-        mce_start_element(reader, NULL, NULL) {
-            dump_text(reader, buf);
-        } mce_end_element(reader);
-
-    } mce_end_children(reader)
-}
-
-__always_inline
-int should_read_part(opcPart part) {
-
-    char *part_name = (char *) part;
-
-    if (part == NULL) {
-        return FALSE;
-    }
-
-    if (    // Word
-            strcmp(part_name, "word/document.xml") == 0
-            || strncmp(part_name, "word/footer", sizeof("word/footer") - 1) == 0
-            || strncmp(part_name, "word/header", sizeof("word/header") - 1) == 0
-            // PowerPoint
-            || strncmp(part_name, "ppt/slides/slide", sizeof("ppt/slides/slide") - 1) == 0
-            || strncmp(part_name, "ppt/notesSlides/notesSlide", sizeof("ppt/notesSlides/notesSlide") - 1) == 0
-            // Excel
-            || strncmp(part_name, "xl/worksheets/sheet", sizeof("xl/worksheets/sheet") - 1) == 0
-            || strcmp(part_name, "xl/sharedStrings.xml") == 0
-            || strcmp(part_name, "xl/workbook.xml") == 0
-            ) {
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
-__always_inline
-void read_part(opcContainer *c, dyn_buffer_t *buf, opcPart part, document_t *doc) {
-
-    mceTextReader_t reader;
-    int ret = opcXmlReaderOpen(c, &reader, part, NULL, "UTF-8", 0);
-
-    if (ret != OPC_ERROR_NONE) {
-        LOG_ERRORF(doc->filepath, "(doc.c) opcXmlReaderOpen() returned error code %d", ret);
-        return;
-    }
-
-    mce_start_document(&reader) {
-        mce_start_element(&reader, NULL, NULL) {
-                dump_text(&reader, buf);
-        } mce_end_element(&reader);
-    } mce_end_document(&reader);
-
-    mceTextReaderCleanup(&reader);
-}
-
-void parse_doc(void *mem, size_t mem_len, document_t *doc) {
-
-    if (mem == NULL) {
-        return;
-    }
-
-    opcContainer *c = opcContainerOpenMem(mem, mem_len, OPC_OPEN_READ_ONLY, NULL);
-    if (c == NULL) {
-        LOG_ERROR(doc->filepath, "(doc.c) Couldn't open document with opcContainerOpenMem()");
-        return;
-    }
-
-    dyn_buffer_t buf = dyn_buffer_create();
-
-    opcPart part = opcPartGetFirst(c);
-    do {
-        if (should_read_part(part)) {
-            read_part(c, &buf, part, doc);
-        }
-    } while ((part = opcPartGetNext(c, part)));
-
-    opcContainerClose(c, OPC_CLOSE_NOW);
-
-    if (buf.cur > 0) {
-        dyn_buffer_write_char(&buf, '\0');
-
-        meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.cur);
-        meta->key = MetaContent;
-        strcpy(meta->strval, buf.buf);
-        APPEND_META(doc, meta)
-    }
-
-    dyn_buffer_destroy(&buf);
-}
--- a/src/parsing/doc.h
+++ b/src/parsing/doc.h
@@ -1,8 +0,0 @@
-#ifndef SIST2_DOC_H
-#define SIST2_DOC_H
-
-#include "src/sist.h"
-
-void parse_doc(void *buf, size_t buf_len, document_t *doc);
-
-#endif
--- a/src/parsing/font.c
+++ b/src/parsing/font.c
@@ -1,226 +0,0 @@
-#include "font.h"
-
-
-#include "src/ctx.h"
-
-__thread FT_Library ft_lib = NULL;
-
-
-typedef struct text_dimensions {
-    unsigned int width;
-    unsigned int height;
-    unsigned int baseline;
-} text_dimensions_t;
-
-typedef struct glyph {
-    int top;
-    int height;
-    int width;
-    int descent;
-    int ascent;
-    int advance_width;
-    unsigned char *pixmap;
-} glyph_t;
-
-
-__always_inline
-int kerning_offset(char c, char pc, FT_Face face) {
-    FT_Vector kerning;
-    FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning);
-
-    return (int) (kerning.x / 64);
-}
-
-__always_inline
-glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
-    glyph_t glyph;
-
-    glyph.pixmap = slot->bitmap.buffer;
-
-    glyph.width = (int) slot->bitmap.width;
-    glyph.height = (int) slot->bitmap.rows;
-    glyph.top = slot->bitmap_top;
-    glyph.advance_width = (int) slot->advance.x / 64;
-
-    glyph.descent = MAX(0, glyph.height - glyph.top);
-    glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
-
-    return glyph;
-}
-
-text_dimensions_t text_dimension(char *text, FT_Face face) {
-    text_dimensions_t dimensions;
-
-    dimensions.width = 0;
-
-    int num_chars = (int) strlen(text);
-
-    unsigned int max_ascent = 0;
-    int max_descent = 0;
-
-    char pc = 0;
-    for (int i = 0; i < num_chars; i++) {
-        char c = text[i];
-
-        FT_Load_Char(face, c, 0);
-        glyph_t glyph = ft_glyph_to_glyph(face->glyph);
-
-        max_descent = MAX(max_descent, glyph.descent);
-        max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));
-
-        int kerning_x = kerning_offset(c, pc, face);
-        dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
-
-        pc = c;
-    }
-
-    dimensions.height = max_ascent + max_descent;
-    dimensions.baseline = max_descent;
-
-    return dimensions;
-}
-
-void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) {
-    unsigned int src = 0;
-    unsigned int dst = y * text_info.width + x;
-    unsigned int row_offset = text_info.width - glyph->width;
-    unsigned int buf_len = text_info.width * text_info.height;
-
-    for (unsigned int sy = 0; sy < glyph->height; sy++) {
-        for (unsigned int sx = 0; sx < glyph->width; sx++) {
-            if (dst < buf_len) {
-                bitmap[dst] |= glyph->pixmap[src];
-            }
-            src++;
-            dst++;
-        }
-        dst += row_offset;
-    }
-}
-
-void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) {
-
-    dyn_buffer_write_short(buf, 0x4D42); // Magic
-    dyn_buffer_write_int(buf, 0); // Size placeholder
-    dyn_buffer_write_int(buf, 0x5157); //Reserved
-    dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset
-
-    dyn_buffer_write_int(buf, 40); // DIB size
-    dyn_buffer_write_int(buf, (int) dimensions.width);
-    dyn_buffer_write_int(buf, (int) dimensions.height);
-    dyn_buffer_write_short(buf, 1); // Color planes
-    dyn_buffer_write_short(buf, 8); // bits per pixel
-    dyn_buffer_write_int(buf, 0); // compression
-    dyn_buffer_write_int(buf, 0); // Ignored
-    dyn_buffer_write_int(buf, 3800); // hres
-    dyn_buffer_write_int(buf, 3800); // vres
-    dyn_buffer_write_int(buf, 256); // Color count
-    dyn_buffer_write_int(buf, 0); // Ignored
-
-    // RGBA32 Color table (Grayscale)
-    for (int i = 255; i >= 0; i--) {
-        dyn_buffer_write_int(buf, i + (i << 8) + (i << 16));
-    }
-
-    // Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes
-    for (int y = (int) dimensions.height - 1; y >= 0; y--) {
-        for (unsigned int x = 0; x < dimensions.width; x++) {
-            dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]);
-        }
-        while (buf->cur % 4 != 0) {
-            dyn_buffer_write_char(buf, 0);
-        }
-    }
-
-    // Size
-    *(int *) ((char *) buf->buf + 2) = buf->cur;
-}
-
-void parse_font(const char *buf, size_t buf_len, document_t *doc) {
-    if (ft_lib == NULL) {
-        FT_Init_FreeType(&ft_lib);
-    }
-    if (buf == NULL) {
-        return;
-    }
-
-    FT_Face face;
-    FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
-    if (err != 0) {
-        LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
-        return;
-    }
-
-    char font_name[1024];
-
-    if (face->style_name == NULL || *(face->style_name) == '?') {
-        if (face->family_name == NULL) {
-            strcpy(font_name, "(null)");
-        } else {
-            strcpy(font_name, face->family_name);
-        }
-    } else {
-        snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
-    }
-
-    meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name));
-    meta_name->key = MetaFontName;
-    strcpy(meta_name->strval, font_name);
-    APPEND_META(doc, meta_name)
-
-    if (ScanCtx.tn_size <= 0) {
-        return;
-    }
-
-    int pixel = 64;
-    int num_chars = (int) strlen(font_name);
-
-    err = FT_Set_Pixel_Sizes(face, 0, pixel);
-    if (err != 0) {
-        LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
-        return;
-    }
-
-    text_dimensions_t dimensions = text_dimension(font_name, face);
-    unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1);
-
-    FT_Vector pen;
-    pen.x = 0;
-
-    char pc = 0;
-    for (int i = 0; i < num_chars; i++) {
-        char c = font_name[i];
-
-        err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
-        if (err != 0) {
-            c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
-            err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
-            if (err != 0) {
-                LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
-                continue;
-            }
-        }
-        glyph_t glyph = ft_glyph_to_glyph(face->glyph);
-
-        pen.x += kerning_offset(c, pc, face);
-        if (pen.x <= 0) {
-            pen.x = ABS(glyph.advance_width - glyph.width);
-        }
-        pen.y = dimensions.height - glyph.ascent - dimensions.baseline;
-
-        draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
-
-        pen.x += glyph.advance_width;
-        pc = c;
-    }
-
-    dyn_buffer_t bmp_data = dyn_buffer_create();
-    bmp_format(&bmp_data, dimensions, bitmap);
-
-    store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
-
-    dyn_buffer_destroy(&bmp_data);
-    free(bitmap);
-
-    FT_Done_Face(face);
-}
--- a/src/parsing/font.h
+++ b/src/parsing/font.h
@@ -1,9 +0,0 @@
-#ifndef SIST2_FONT_H
-#define SIST2_FONT_H
-
-#include "src/sist.h"
-
-
-void parse_font(const char * buf, size_t buf_len, document_t *doc);
-
-#endif
--- a/src/parsing/media.c
+++ b/src/parsing/media.c
@@ -1,381 +0,0 @@
-#include "src/sist.h"
-#include "src/ctx.h"
-
-#define MIN_SIZE 32
-#define AVIO_BUF_SIZE 8192
-
-__always_inline
-AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
-
-    AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
-    AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
-    jpeg->width = dstW;
-    jpeg->height = dstH;
-    jpeg->time_base.den = 1000000;
-    jpeg->time_base.num = 1;
-    jpeg->i_quant_factor = qscale;
-
-    jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
-    int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
-
-    if (ret != 0) {
-        printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
-        return NULL;
-    }
-
-    return jpeg;
-}
-
-__always_inline
-AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
-
-    int dstW;
-    int dstH;
-    if (frame->width <= size && frame->height <= size) {
-        dstW = frame->width;
-        dstH = frame->height;
-    } else {
-        double ratio = (double) frame->width / frame->height;
-        if (frame->width > frame->height) {
-            dstW = size;
-            dstH = (int) (size / ratio);
-        } else {
-            dstW = (int) (size * ratio);
-            dstH = size;
-        }
-    }
-
-    if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
-        return NULL;
-    }
-
-    AVFrame *scaled_frame = av_frame_alloc();
-
-    struct SwsContext *ctx = sws_getContext(
-            decoder->width, decoder->height, decoder->pix_fmt,
-            dstW, dstH, AV_PIX_FMT_YUVJ420P,
-            SWS_FAST_BILINEAR, 0, 0, 0
-    );
-
-    int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
-    uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
-
-    av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
-
-    sws_scale(ctx,
-              (const uint8_t *const *) frame->data, frame->linesize,
-              0, decoder->height,
-              scaled_frame->data, scaled_frame->linesize
-    );
-
-    scaled_frame->width = dstW;
-    scaled_frame->height = dstH;
-    scaled_frame->format = AV_PIX_FMT_YUV420P;
-
-    sws_freeContext(ctx);
-
-    return scaled_frame;
-}
-
-__always_inline
-AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
-    AVFrame *frame = av_frame_alloc();
-
-    AVPacket avPacket;
-    av_init_packet(&avPacket);
-
-    int receive_ret = -EAGAIN;
-    while (receive_ret == -EAGAIN) {
-        // Get video frame
-        while (1) {
-            int read_frame_ret = av_read_frame(pFormatCtx, &avPacket);
-
-            if (read_frame_ret != 0) {
-                if (read_frame_ret != AVERROR_EOF) {
-                    LOG_WARNINGF(doc->filepath,
-                                 "(media.c) avcodec_read_frame() returned error code [%d] %s",
-                                 read_frame_ret, av_err2str(read_frame_ret)
-                    )
-                }
-                av_frame_free(&frame);
-                av_packet_unref(&avPacket);
-                return NULL;
-            }
-
-            //Ignore audio/other frames
-            if (avPacket.stream_index != stream_idx) {
-                av_packet_unref(&avPacket);
-                continue;
-            }
-            break;
-        }
-
-        // Feed it to decoder
-        int decode_ret = avcodec_send_packet(decoder, &avPacket);
-        if (decode_ret != 0) {
-            LOG_WARNINGF(doc->filepath,
-                         "(media.c) avcodec_send_packet() returned error code [%d] %s",
-                         decode_ret, av_err2str(decode_ret)
-            )
-        }
-        av_packet_unref(&avPacket);
-        receive_ret = avcodec_receive_frame(decoder, frame);
-    }
-    return frame;
-}
-
-#define APPEND_TAG_META(doc, tag_, keyname) \
-    text_buffer_t tex = text_buffer_create(-1); \
-    text_buffer_append_string0(&tex, tag_->value); \
-    text_buffer_terminate_string(&tex); \
-    meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
-    meta_tag->key = keyname; \
-    strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
-    APPEND_META(doc, meta_tag) \
-    text_buffer_destroy(&tex);
-
-__always_inline
-void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
-
-    AVDictionaryEntry *tag = NULL;
-    while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
-        char key[32];
-        strncpy(key, tag->key, sizeof(key));
-
-        char *ptr = key;
-        for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
-
-        if (strcmp(key, "artist") == 0) {
-            APPEND_TAG_META(doc, tag, MetaArtist)
-        } else if (strcmp(key, "genre") == 0) {
-            APPEND_TAG_META(doc, tag, MetaGenre)
-        } else if (strcmp(key, "title") == 0) {
-            APPEND_TAG_META(doc, tag, MetaTitle)
-        } else if (strcmp(key, "album_artist") == 0) {
-            APPEND_TAG_META(doc, tag, MetaAlbumArtist)
-        } else if (strcmp(key, "album") == 0) {
-            APPEND_TAG_META(doc, tag, MetaAlbum)
-        }
-    }
-}
-
-__always_inline
-void
-append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
-
-    if (is_video) {
-        meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
-        meta_duration->key = MetaMediaDuration;
-        meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
-        APPEND_META(doc, meta_duration)
-
-        meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
-        meta_bitrate->key = MetaMediaBitrate;
-        meta_bitrate->longval = pFormatCtx->bit_rate;
-        APPEND_META(doc, meta_bitrate)
-    }
-
-    AVDictionaryEntry *tag = NULL;
-    if (is_video) {
-        while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
-            if (include_audio_tags && strcmp(tag->key, "title") == 0) {
-                APPEND_TAG_META(doc, tag, MetaTitle)
-            } else if (strcmp(tag->key, "comment") == 0) {
-                APPEND_TAG_META(doc, tag, MetaContent)
-            } else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
-                APPEND_TAG_META(doc, tag, MetaArtist)
-            }
-        }
-    } else {
-        // EXIF metadata
-        while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
-            if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
-                APPEND_TAG_META(doc, tag, MetaArtist)
-            } else if (strcmp(tag->key, "ImageDescription") == 0) {
-                APPEND_TAG_META(doc, tag, MetaContent)
-            }
-        }
-    }
-}
-
-void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {
-
-    int video_stream = -1;
-    int audio_stream = -1;
-
-    avformat_find_stream_info(pFormatCtx, NULL);
-
-    for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) {
-        AVStream *stream = pFormatCtx->streams[i];
-
-        if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (audio_stream == -1) {
-                meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
-                meta_audio->key = MetaMediaAudioCodec;
-                meta_audio->intval = stream->codecpar->codec_id;
-                APPEND_META(doc, meta_audio)
-
-                append_audio_meta(pFormatCtx, doc);
-                audio_stream = i;
-            }
-        } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-
-            if (video_stream == -1) {
-                meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
-                meta_vid->key = MetaMediaVideoCodec;
-                meta_vid->intval = stream->codecpar->codec_id;
-                APPEND_META(doc, meta_vid)
-
-                meta_line_t *meta_w = malloc(sizeof(meta_line_t));
-                meta_w->key = MetaWidth;
-                meta_w->intval = stream->codecpar->width;
-                APPEND_META(doc, meta_w)
-
-                meta_line_t *meta_h = malloc(sizeof(meta_line_t));
-                meta_h->key = MetaHeight;
-                meta_h->intval = stream->codecpar->height;
-                APPEND_META(doc, meta_h)
-
-                video_stream = i;
-            }
-        }
-    }
-
-    if (video_stream != -1 && ScanCtx.tn_size > 0) {
-        AVStream *stream = pFormatCtx->streams[video_stream];
-
-        if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
-            avformat_close_input(&pFormatCtx);
-            avformat_free_context(pFormatCtx);
-            return;
-        }
-
-        // Decoder
-        AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id);
-        AVCodecContext *decoder = avcodec_alloc_context3(video_codec);
-        avcodec_parameters_to_context(decoder, stream->codecpar);
-        avcodec_open2(decoder, video_codec, NULL);
-
-        //Seek
-        if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
-            int seek_ret = 0;
-            for (int i = 20; i >= 0; i--) {
-                seek_ret = av_seek_frame(pFormatCtx, video_stream,
-                                         stream->duration * 0.10, 0);
-                if (seek_ret == 0) {
-                    break;
-                }
-            }
-        }
-
-        AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
-        if (frame == NULL) {
-            avcodec_free_context(&decoder);
-            avformat_close_input(&pFormatCtx);
-            avformat_free_context(pFormatCtx);
-            return;
-        }
-
-        append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
-
-        // Scale frame
-        AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
-
-        if (scaled_frame == NULL) {
-            av_frame_free(&frame);
-            avcodec_free_context(&decoder);
-            avformat_close_input(&pFormatCtx);
-            avformat_free_context(pFormatCtx);
-            return;
-        }
-
-        // Encode frame to jpeg
-        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
-        avcodec_send_frame(jpeg_encoder, scaled_frame);
-
-        AVPacket jpeg_packet;
-        av_init_packet(&jpeg_packet);
-        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
-
-        // Save thumbnail
-        store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
-                    jpeg_packet.size);
-
-        av_packet_unref(&jpeg_packet);
-        av_frame_free(&frame);
-        av_free(*scaled_frame->data);
-        av_frame_free(&scaled_frame);
-        avcodec_free_context(&jpeg_encoder);
-        avcodec_free_context(&decoder);
-    }
-
-    avformat_close_input(&pFormatCtx);
-    avformat_free_context(pFormatCtx);
-}
-
-void parse_media_filename(const char *filepath, document_t *doc) {
-
-    AVFormatContext *pFormatCtx = avformat_alloc_context();
-    if (pFormatCtx == NULL) {
-        LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
-        return;
-    }
-    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
-    if (res < 0) {
-        LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
-        avformat_close_input(&pFormatCtx);
-        avformat_free_context(pFormatCtx);
-        return;
-    }
-
-    parse_media(pFormatCtx, doc);
-}
-
-
-int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
-    struct vfile *f = ptr;
-
-    int ret = f->read(f, buf, buf_size);
-
-    if (ret == 0) {
-        return AVERROR_EOF;
-    }
-    return ret;
-}
-
-void parse_media_vfile(struct vfile *f, document_t *doc) {
-
-    AVFormatContext *pFormatCtx = avformat_alloc_context();
-    if (pFormatCtx == NULL) {
-        LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
-        return;
-    }
-
-    unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
-    AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
-
-    pFormatCtx->pb = io_ctx;
-    pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
-
-    int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
-    if (res == -5) {
-        // Tried to parse media that requires seek
-        av_free(io_ctx->buffer);
-        avio_context_free(&io_ctx);
-        avformat_close_input(&pFormatCtx);
-        avformat_free_context(pFormatCtx);
-        return;
-    } else if (res < 0) {
-        LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
-        av_free(io_ctx->buffer);
-        avio_context_free(&io_ctx);
-        avformat_close_input(&pFormatCtx);
-        avformat_free_context(pFormatCtx);
-        return;
-    }
-
-    parse_media(pFormatCtx, doc);
-    av_free(io_ctx->buffer);
-    avio_context_free(&io_ctx);
-}
-
--- a/src/parsing/media.h
+++ b/src/parsing/media.h
@@ -1,14 +0,0 @@
-#ifndef SIST2_MEDIA_H
-#define SIST2_MEDIA_H
-
-
-#include "src/sist.h"
-
-#define MIN_VIDEO_SIZE 1024 * 64
-#define MIN_IMAGE_SIZE 1024 * 2
-
-void parse_media_filename(const char * filepath, document_t *doc);
-
-void parse_media_vfile(struct vfile *f, document_t *doc);
-
-#endif
--- a/src/parsing/mime.h
+++ b/src/parsing/mime.h
@@ -1,14 +1,15 @@
 #ifndef SIST2_MIME_H
 #define SIST2_MIME_H

-#include "src/sist.h"
+#include "../sist.h"

-#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
+#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16

 #define MIME_EMPTY 1
+#define MIME_SIST2_SIDECAR 2

 #define DONT_PARSE 0x80000000
-#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0
+#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)

 #define PDF_MASK 0x40000000
 #define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -25,6 +26,15 @@
 #define DOC_MASK 0x04000000
 #define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK

+#define MOBI_MASK 0x02000000
+#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
+
+#define MARKUP_MASK 0x01000000
+#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
+
+#define RAW_MASK 0x00800000
+#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
+
 enum major_mime {
    MimeInvalid = 0,
    MimeModel = 1,
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -1,7 +1,16 @@
+#include "parse.h"
+
 #include "src/sist.h"
 #include "src/ctx.h"
+#include "mime.h"
+#include "src/io/serialize.h"
+#include "src/parsing/sidecar.h"

-__thread magic_t Magic = NULL;
+#include <magic.h>
+
+
+#define MIN_VIDEO_SIZE 1024 * 64
+#define MIN_IMAGE_SIZE 1024 * 2

 int fs_read(struct vfile *f, void *buf, size_t size) {

@@ -24,62 +33,47 @@ void fs_close(struct vfile *f) {
    }
 }

-void *read_all(parse_job_t *job, const char *buf, int bytes_read) {
-
-    void *full_buf;
-
-    if (job->info.st_size <= bytes_read) {
-        full_buf = malloc(job->info.st_size);
-        memcpy(full_buf, buf, job->info.st_size);
-    } else {
-        full_buf = malloc(job->info.st_size);
-        memcpy(full_buf, buf, bytes_read);
-
-        int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
-        if (ret == -1) {
-            LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
-            return NULL;
+void fs_reset(struct vfile *f) {
+    if (f->fd != -1) {
+        lseek(f->fd, 0, SEEK_SET);
    }
-    }
-
-    return full_buf;
 }

+#define IS_GIT_OBJ (strlen(doc.filepath + doc.base) == 38 && (strstr(doc.filepath, "objects") != NULL))
+
 void parse(void *arg) {

    parse_job_t *job = arg;
    document_t doc;

-    int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
-    if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
-        incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
-        return;
-    }
-
-    if (Magic == NULL) {
-        Magic = magic_open(MAGIC_MIME_TYPE);
-    }
-
    doc.filepath = job->filepath;
    doc.ext = (short) job->ext;
    doc.base = (short) job->base;
+
+    char *rel_path = doc.filepath + ScanCtx.index.desc.root_len;
+    MD5((unsigned char *) rel_path, strlen(rel_path), doc.path_md5);
+
    doc.meta_head = NULL;
    doc.meta_tail = NULL;
    doc.mime = 0;
-    doc.size = job->info.st_size;
-    doc.ino = job->info.st_ino;
-    doc.mtime = job->info.st_mtim.tv_sec;
+    doc.size = job->vfile.info.st_size;
+    doc.mtime = job->vfile.info.st_mtim.tv_sec;

-    uuid_generate(doc.uuid);
-    char *buf[PARSE_BUF_SIZE];
-
-    if (LogCtx.very_verbose) {
-        char uuid_str[UUID_STR_LEN];
-        uuid_unparse(doc.uuid, uuid_str);
-        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
+    int inc_ts = incremental_get(ScanCtx.original_table, doc.path_md5);
+    if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
+        incremental_mark_file_for_copy(ScanCtx.copy_table, doc.path_md5);
+        return;
    }

-    if (job->info.st_size == 0) {
+    char *buf[MAGIC_BUF_SIZE];
+
+    if (LogCtx.very_verbose) {
+        char path_md5_str[MD5_STR_LENGTH];
+        buf2hex(doc.path_md5, MD5_DIGEST_LENGTH, path_md5_str);
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
+    }
+
+    if (job->vfile.info.st_size == 0) {
        doc.mime = MIME_EMPTY;
    } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
        doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
@@ -87,82 +81,113 @@ void parse(void *arg) {

    int bytes_read = 0;

-    if (doc.mime == 0) {
+    if (doc.mime == 0 && !ScanCtx.fast) {
+        if (IS_GIT_OBJ) {
+            goto abort;
+        }
+
        // Get mime type with libmagic
-        bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
-        if (bytes_read == -1) {
-            LOG_WARNINGF(job->filepath, "read() Error: %s", strerror(errno))
+        if (!job->vfile.is_fs_file) {
+            LOG_WARNING(job->filepath,
+                        "Guessing mime type with libmagic inside archive files is not currently supported");
+            goto abort;
+        }
+
+        bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
+        if (bytes_read < 0) {
+
+            if (job->vfile.is_fs_file) {
+                LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
+            } else {
+                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
+            }
+
            CLOSE_FILE(job->vfile)
            return;
        }

-        const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
+        magic_t magic = magic_open(MAGIC_MIME_TYPE);
+        magic_load(magic, NULL);
+
+        const char *magic_mime_str = magic_buffer(magic, buf, bytes_read);
        if (magic_mime_str != NULL) {
            doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
+
+            LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
+
            if (doc.mime == 0) {
                LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
            }
        }
+
+        job->vfile.reset(&job->vfile);
+
+        magic_close(magic);
    }

    int mmime = MAJOR_MIME(doc.mime);

    if (!(SHOULD_PARSE(doc.mime))) {

+    } else if (IS_RAW(doc.mime)) {
+        parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
    } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
               (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {

-        if (job->vfile.is_fs_file) {
-            parse_media_filename(job->filepath, &doc);
-        } else {
-            parse_media_vfile(&job->vfile, &doc);
-        }
+        parse_media(&ScanCtx.media_ctx, &job->vfile, &doc);

    } else if (IS_PDF(doc.mime)) {
-        void *pdf_buf = read_all(job, (char *) buf, bytes_read);
-        parse_pdf(pdf_buf, doc.size, &doc);
+        parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);

-        if (pdf_buf != buf && pdf_buf != NULL) {
-            free(pdf_buf);
+    } else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
+        if (IS_MARKUP(doc.mime)) {
+            parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
+        } else {
+            parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
        }

-    } else if (mmime == MimeText && ScanCtx.content_size > 0) {
-        parse_text(bytes_read, &job->vfile, (char *) buf, &doc);
-
    } else if (IS_FONT(doc.mime)) {
-        void *font_buf = read_all(job, (char *) buf, bytes_read);
-        parse_font(font_buf, doc.size, &doc);
+        parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);

-        if (font_buf != buf && font_buf != NULL) {
-            free(font_buf);
-        }
    } else if (
-            ScanCtx.archive_mode != ARC_MODE_SKIP && (
+            ScanCtx.arc_ctx.mode != ARC_MODE_SKIP && (
                    IS_ARC(doc.mime) ||
                    (IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
            )) {
-        parse_archive(&job->vfile, &doc);
-    } else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
-        void *doc_buf = read_all(job, (char *) buf, bytes_read);
-        parse_doc(doc_buf, doc.size, &doc);
+        parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
+    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
+        parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
+    } else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
+        parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
+    } else if (IS_MOBI(doc.mime)) {
+        parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
+    } else if (doc.mime == MIME_SIST2_SIDECAR) {
+        parse_sidecar(&job->vfile, &doc);
+        CLOSE_FILE(job->vfile)
+        return;
+    } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc.mime)) {
+        parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, &doc);
+    }

-        if (doc_buf != buf && doc_buf != NULL) {
-            free(doc_buf);
-        }
-    }
+    abort:

    //Parent meta
-    if (!uuid_is_null(job->parent)) {
-        char tmp[UUID_STR_LEN];
-        uuid_unparse(job->parent, tmp);
-
-        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
+    if (!md5_digest_is_null(job->parent)) {
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
        meta_parent->key = MetaParent;
-        strcpy(meta_parent->strval, tmp);
+        buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
        APPEND_META((&doc), meta_parent)
+
+        doc.has_parent = TRUE;
+    } else {
+        doc.has_parent = FALSE;
    }

    write_document(&doc);

    CLOSE_FILE(job->vfile)
 }
+
+void cleanup_parse() {
+    // noop
+}
--- a/src/parsing/parse.h
+++ b/src/parsing/parse.h
@@ -1,13 +1,16 @@
 #ifndef SIST2_PARSE_H
 #define SIST2_PARSE_H

-#include "src/sist.h"
+#include "../sist.h"

-#define PARSE_BUF_SIZE 4096
+#define MAGIC_BUF_SIZE 4096 * 6

 int fs_read(struct vfile *f, void *buf, size_t size);
 void fs_close(struct vfile *f);
+void fs_reset(struct vfile *f);

 void parse(void *arg);

+void cleanup_parse();
+
 #endif
--- a/src/parsing/pdf.c
+++ b/src/parsing/pdf.c
@@ -1,336 +0,0 @@
-#include "pdf.h"
-#include "src/ctx.h"
-
-#define MIN_OCR_SIZE 128
-__thread text_buffer_t thread_buffer;
-
-
-fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
-
-    int err = 0;
-    fz_page *cover = NULL;
-
-    fz_var(cover);
-    fz_try(ctx)
-        cover = fz_load_page(ctx, fzdoc, 0);
-    fz_catch(ctx)
-        err = 1;
-
-    if (err != 0) {
-        fz_drop_page(ctx, cover);
-        LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
-        return NULL;
-    }
-
-    fz_rect bounds = fz_bound_page(ctx, cover);
-
-    float scale;
-    float w = (float) bounds.x1 - bounds.x0;
-    float h = (float) bounds.y1 - bounds.y0;
-    if (w > h) {
-        scale = (float) ScanCtx.tn_size / w;
-    } else {
-        scale = (float) ScanCtx.tn_size / h;
-    }
-    fz_matrix m = fz_scale(scale, scale);
-
-    bounds = fz_transform_rect(bounds, m);
-    fz_irect bbox = fz_round_rect(bounds);
-    fz_pixmap *pixmap = fz_new_pixmap_with_bbox(ctx, ctx->colorspace->rgb, bbox, NULL, 0);
-
-    fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
-    fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
-
-    fz_var(err);
-    fz_try(ctx)
-    {
-        pthread_mutex_lock(&ScanCtx.mupdf_mu);
-        fz_run_page(ctx, cover, dev, fz_identity, NULL);
-    }
-    fz_always(ctx)
-    {
-        fz_close_device(ctx, dev);
-        fz_drop_device(ctx, dev);
-        pthread_mutex_unlock(&ScanCtx.mupdf_mu);
-    }
-    fz_catch(ctx)
-        err = ctx->error.errcode;
-
-    if (err != 0) {
-        LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
-        fz_drop_page(ctx, cover);
-        fz_drop_pixmap(ctx, pixmap);
-        return NULL;
-    }
-
-    fz_buffer *fzbuf = NULL;
-    fz_var(fzbuf);
-    fz_var(err);
-
-    fz_try(ctx)
-        fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
-    fz_catch(ctx)
-        err = ctx->error.errcode;
-
-    if (err == 0) {
-        unsigned char *tn_buf;
-        size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
-        store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
-    }
-
-    fz_drop_buffer(ctx, fzbuf);
-    fz_drop_pixmap(ctx, pixmap);
-
-    if (err != 0) {
-        LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
-                     ctx->error.message)
-        fz_drop_page(ctx, cover);
-        return NULL;
-    }
-
-    return cover;
-}
-
-void fz_err_callback(void *user, UNUSED(const char *message)) {
-    if (LogCtx.verbose) {
-        document_t *doc = (document_t *) user;
-        LOG_WARNINGF(doc->filepath, "FZ: %s", message)
-    }
-}
-
-__always_inline
-void init_ctx(fz_context *ctx, document_t *doc) {
-    fz_disable_icc(ctx);
-    fz_register_document_handlers(ctx);
-
-    ctx->warn.print_user = doc;
-    ctx->warn.print = fz_err_callback;
-    ctx->error.print_user = doc;
-    ctx->error.print = fz_err_callback;
-}
-
-int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
-    if (block->type != FZ_STEXT_BLOCK_TEXT) {
-        return 0;
-    }
-
-    fz_stext_line *line = block->u.t.first_line;
-    while (line != NULL) {
-        fz_stext_char *c = line->first_char;
-        while (c != NULL) {
-            if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
-                return TEXT_BUF_FULL;
-            }
-            c = c->next;
-        }
-        line = line->next;
-    }
-    return 0;
-}
-
-
-void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
-                fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
-                UNUSED(fz_color_params color_params)) {
-
-    int l2factor = 0;
-
-    if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE) {
-
-        fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
-
-        if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
-            TessBaseAPI *api = TessBaseAPICreate();
-            TessBaseAPIInit3(api, TESS_DATAPATH, ScanCtx.tesseract_lang);
-
-            TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
-            TessBaseAPISetSourceResolution(api, pix->xres);
-
-            char *text = TessBaseAPIGetUTF8Text(api);
-            size_t len = strlen(text);
-            text_buffer_append_string(&thread_buffer, text, len - 1);
-            LOG_DEBUGF(
-                    "pdf.c",
-                    "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
-                    pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
-            )
-
-            TessBaseAPIEnd(api);
-            TessBaseAPIDelete(api);
-            fz_drop_pixmap(ctx, pix);
-        }
-    }
-}
-
-void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
-
-    if (buf == NULL) {
-        return;
-    }
-
-    static int mu_is_initialized = 0;
-    if (!mu_is_initialized) {
-        pthread_mutex_init(&ScanCtx.mupdf_mu, NULL);
-        mu_is_initialized = 1;
-    }
-    fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
-
-    init_ctx(ctx, doc);
-
-    int err = 0;
-
-    fz_document *fzdoc = NULL;
-    fz_stream *stream = NULL;
-    fz_var(fzdoc);
-    fz_var(stream);
-    fz_var(err);
-
-    fz_try(ctx)
-    {
-        stream = fz_open_memory(ctx, buf, buf_len);
-        fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
-    }
-    fz_catch(ctx)
-        err = ctx->error.errcode;
-
-    if (err) {
-        fz_drop_stream(ctx, stream);
-        fz_drop_document(ctx, fzdoc);
-        fz_drop_context(ctx);
-        return;
-    }
-
-    char title[4096] = {'\0',};
-    fz_try(ctx)
-        fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
-    fz_catch(ctx)
-        ;
-
-    if (strlen(title) > 0) {
-        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
-        meta_content->key = MetaTitle;
-        strcpy(meta_content->strval, title);
-        APPEND_META(doc, meta_content)
-    }
-
-    int page_count = -1;
-    fz_var(err);
-    fz_try(ctx)
-        page_count = fz_count_pages(ctx, fzdoc);
-    fz_catch(ctx)
-        err = ctx->error.errcode;
-
-    if (err) {
-        LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
-        fz_drop_stream(ctx, stream);
-        fz_drop_document(ctx, fzdoc);
-        fz_drop_context(ctx);
-        return;
-    }
-
-    fz_page *cover = NULL;
-    if (ScanCtx.tn_size > 0) {
-        cover = render_cover(ctx, doc, fzdoc);
-    } else {
-        fz_var(cover);
-        fz_try(ctx)
-            cover = fz_load_page(ctx, fzdoc, 0);
-        fz_catch(ctx)
-            cover = NULL;
-    }
-
-    if (cover == NULL) {
-        fz_drop_stream(ctx, stream);
-        fz_drop_document(ctx, fzdoc);
-        fz_drop_context(ctx);
-        return;
-    }
-
-    if (ScanCtx.content_size > 0) {
-        fz_stext_options opts = {0};
-        thread_buffer = text_buffer_create(ScanCtx.content_size);
-
-        for (int current_page = 0; current_page < page_count; current_page++) {
-            fz_page *page = NULL;
-            if (current_page == 0) {
-                page = cover;
-            } else {
-                fz_var(err);
-                fz_try(ctx)
-                            page = fz_load_page(ctx, fzdoc, current_page);
-                fz_catch(ctx)
-                    err = ctx->error.errcode;
-                if (err != 0) {
-                    LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
-                    text_buffer_destroy(&thread_buffer);
-                    fz_drop_page(ctx, page);
-                    fz_drop_stream(ctx, stream);
-                    fz_drop_document(ctx, fzdoc);
-                    fz_drop_context(ctx);
-                    return;
-                }
-            }
-
-            fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
-            fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
-            dev->stroke_path = NULL;
-            dev->stroke_text = NULL;
-            dev->clip_text = NULL;
-            dev->clip_stroke_path = NULL;
-            dev->clip_stroke_text = NULL;
-
-            if (ScanCtx.tesseract_lang != NULL) {
-                dev->fill_image = fill_image;
-            }
-
-            fz_var(err);
-            fz_try(ctx)
-                        fz_run_page(ctx, page, dev, fz_identity, NULL);
-            fz_always(ctx)
-                {
-                    fz_close_device(ctx, dev);
-                    fz_drop_device(ctx, dev);
-                }
-            fz_catch(ctx)
-                err = ctx->error.errcode;
-
-            if (err != 0) {
-                LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
-                text_buffer_destroy(&thread_buffer);
-                fz_drop_page(ctx, page);
-                fz_drop_stext_page(ctx, stext);
-                fz_drop_stream(ctx, stream);
-                fz_drop_document(ctx, fzdoc);
-                fz_drop_context(ctx);
-                return;
-            }
-
-            fz_stext_block *block = stext->first_block;
-            while (block != NULL) {
-                int ret = read_stext_block(block, &thread_buffer);
-                if (ret == TEXT_BUF_FULL) {
-                    break;
-                }
-                block = block->next;
-            }
-            fz_drop_stext_page(ctx, stext);
-            fz_drop_page(ctx, page);
-
-            if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
-                break;
-            }
-        }
-        text_buffer_terminate_string(&thread_buffer);
-
-        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
-        meta_content->key = MetaContent;
-        memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
-        APPEND_META(doc, meta_content)
-
-        text_buffer_destroy(&thread_buffer);
-    }
-
-    fz_drop_stream(ctx, stream);
-    fz_drop_document(ctx, fzdoc);
-    fz_drop_context(ctx);
-}
--- a/src/parsing/pdf.h
+++ b/src/parsing/pdf.h
@@ -1,9 +0,0 @@
-#ifndef SIST2_PDF_H
-#define SIST2_PDF_H
-
-#include "src/sist.h"
-
-
-void parse_pdf(void *buf, size_t buf_len, document_t *doc);
-
-#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
simon987	013c54daa0	Fix tag delete	2020-12-31 12:55:37 -05:00
simon987	54308ef5e2	Update tags tab automatically	2020-12-31 12:45:23 -05:00
simon987	638c2a5c1a	Update binary names (again)	2020-12-31 11:03:25 -05:00
simon987	9587caddd9	Don't build tests by default, fix enlarge button	2020-12-31 10:55:34 -05:00
simon987	f5bbe0dc97	Update binary names	2020-12-31 10:54:30 -05:00
simon987	f87eac1f90	Update submodules	2020-12-31 10:26:05 -05:00
simon987	ddafbab6a6	Update readme	2020-12-31 10:26:05 -05:00
simon987	b91d574756	Add md5 client-side lib	2020-12-31 10:26:05 -05:00
simon987	576140e542	fix submodules	2020-12-31 10:26:05 -05:00
simon987	050c1283a3	Remove UUID dep, fix incremental scan, use MD5(path) as unique id, version bump	2020-12-31 10:26:05 -05:00
simon987	c6e1ba03bc	Better support for .doc files	2020-12-31 10:26:05 -05:00
simon987	10e32f707f	Update README.md	2020-12-31 10:26:05 -05:00
simon987	86e83bafaf	Update README.md	2020-12-31 10:26:05 -05:00
simon987	51a40c8819	Add .doc support	2020-12-31 10:26:05 -05:00
acc557	36281a5108	Use relative path for loading csv in stats	2020-12-31 10:26:05 -05:00
acc557	76a0bda48b	Update search.html Fix relative stats URL	2020-12-31 10:26:05 -05:00
simon987	0cf29a660c	Fix relative image URL #122	2020-12-31 10:26:05 -05:00
simon987	6cd0741848	update build instructions	2020-12-31 10:26:05 -05:00
simon987	bc120f349d	Setup ARM CI builds	2020-12-23 10:26:26 -05:00
simon987	8cac8c98d7	Update dev builds template	2020-12-22 14:45:16 -05:00
simon987	30921ac52e	Setup drone ci	2020-12-22 14:09:45 -05:00
simon987	95bbe39afc	Update libmupdf	2020-10-25 09:44:30 -04:00
simon987	72ce217f9c	Optionally ES schema from file #117	2020-10-25 09:44:30 -04:00
simon987	641a8ec90c	sidecar files #114 , version bump	2020-10-25 09:44:30 -04:00
simon987	7a505c2287	Fix typo	2020-10-25 09:44:30 -04:00
simon987	12f162d760	Fix #110	2020-10-25 09:44:30 -04:00
simon987	4b4ab12fac	Version bump	2020-09-22 21:08:24 -04:00
simon987	ae283f77ad	Fix #112	2020-09-22 21:08:24 -04:00
simon987	d3bd53a5ea	Fix arm Dockerfile	2020-09-13 16:16:26 -04:00
simon987	f7887f24d1	sync libscan	2020-09-13 16:16:26 -04:00
simon987	5c8de19188	Update build instructions	2020-09-13 16:16:26 -04:00
simon987	d861d278a4	version bump	2020-09-13 16:16:26 -04:00
simon987	b6ddeee0e0	Use async curl for ES requests #108	2020-09-13 16:16:26 -04:00
simon987	0cd2523b05	arm64 build	2020-09-13 16:16:26 -04:00
simon987	5e798f9367	Update issue-template.md	2020-09-13 10:29:42 -04:00
simon987	5da6c1488b	Handle null mime in document info dialog	2020-08-29 10:34:58 -04:00
simon987	9568e25f84	Fix #99	2020-08-29 10:17:28 -04:00
simon987	6a8027789a	Limited support for UTF16	2020-08-29 10:17:28 -04:00
simon987	b1d16d8abf	Fix #100	2020-08-29 10:17:28 -04:00
simon987	b2a157e24d	Update docs	2020-08-25 10:38:38 -04:00
simon987	9aead9389a	Fix typo in elastic.c	2020-08-25 10:38:38 -04:00
simon987	a32c68cba8	Build fixes	2020-08-25 10:38:38 -04:00
simon987	d116cf9d91	Default index for web & exec	2020-08-25 10:38:38 -04:00
Andrew	a020a8b32c	Update USAGE.md Fix link to scripting.	2020-08-25 10:38:38 -04:00
simon987	5d5d9c3092	Fix heap buffer overflow warning	2020-08-25 10:38:38 -04:00
simon987	3379d5ce71	Fix #97	2020-08-25 10:38:38 -04:00
simon987	a0ff4a1f01	Fix heap buffer overflow warning	2020-08-25 10:38:38 -04:00
simon987	4589f3bde7	Fix #94	2020-08-25 10:38:38 -04:00
simon987	1c898640cf	Fix #88	2020-08-25 10:38:38 -04:00
simon987	a0739d5177	Fix #92	2020-08-25 10:38:38 -04:00
simon987	8f9d29dbc6	Fix #91	2020-08-25 10:38:38 -04:00
simon987	3ff4b70223	Update README.md	2020-08-25 10:38:38 -04:00
simon987	02ad035b09	Workaround when first ebook page is blank	2020-08-25 10:38:38 -04:00
simon987	c11feb213d	Gracefully handle archive errors in comic.c	2020-08-25 10:38:38 -04:00
simon987	72902947cd	Fix for #90	2020-08-25 10:38:38 -04:00
simon987	a18bb81222	remove warning	2020-08-25 10:38:38 -04:00
simon987	1520288f19	Fix #89	2020-08-25 10:38:38 -04:00
simon987	e507de194b	Fix log colors	2020-08-25 10:38:38 -04:00
simon987	0e517d5e2b	Fix #81	2020-08-03 20:09:07 -04:00
simon987	8223ef3860	Update USAGE.md	2020-08-03 19:48:49 -04:00
simon987	995a196690	Log user script task, add async arg	2020-08-03 19:44:43 -04:00
simon987	465d017e18	CSS tweaks, fix #87	2020-08-03 19:15:12 -04:00
simon987	ca994d3914	Fix bug with media files, don't encode thumbnail when not necessary	2020-07-26 11:52:48 -04:00
simon987	db2285973f	Configurable column count	2020-07-26 11:50:21 -04:00
simon987	61de9e9f14	Set timeout for HTTP get request	2020-07-25 19:55:27 -04:00
simon987	3015ef0ff4	Increase file preview file	2020-07-25 17:26:17 -04:00
simon987	b55d432841	Fix #65	2020-07-25 09:37:37 -04:00
simon987	ed90a140ce	Update README.md	2020-07-19 14:53:03 -04:00
simon987	052df82373	Fix #83	2020-07-19 13:10:30 -04:00
simon987	5676136777	Remove println that was left accidentally	2020-07-18 20:55:12 -04:00
simon987	c061613302	Fix #76	2020-07-18 19:23:43 -04:00
simon987	d0325fd9b9	Fix for simon987/sist2#85	2020-07-18 18:48:54 -04:00
simon987	e05a6f3863	Fix for #75	2020-07-18 18:46:52 -04:00
simon987	f1690a9cca	Mobi build fix	2020-07-18 13:10:45 -04:00
simon987	100a264413	Don't show MuPDF warnings unless --very-verbose is specified	2020-07-18 10:28:05 -04:00
simon987	29390bb454	Update README	2020-07-18 09:54:36 -04:00
simon987	4d43036ded	Fix simon987/sist2#78	2020-07-18 09:41:39 -04:00
simon987	0b5cdbd130	Fix #79	2020-07-18 09:36:10 -04:00
simon987	53d7695f66	Read .raw thumbnails #80 , fix media probing for some formats	2020-07-18 09:31:42 -04:00
simon987	8d53456404	fix libscan submodule	2020-07-17 20:33:50 -04:00
simon987	cbc08a7cc9	Save ebook renders as jpeg	2020-07-17 20:18:21 -04:00
simon987	e629b4d7d3	Faster comic book parsing, probably fixes #77	2020-07-17 19:10:18 -04:00
simon987	22f7073b39	mobi reading bugfix	2020-07-16 20:30:28 -04:00
simon987	1781a74960	Oops I didn't mean to push this	2020-07-16 19:23:52 -04:00
simon987	db96c95ac7	log fix #73	2020-07-16 19:19:23 -04:00
simon987	7b9fa4cc0a	Fix bad merge...	2020-07-15 21:00:51 -04:00
simon987	5cc1fa86a9	Read embedded thumbnail simon987/sist2#74	2020-07-15 20:56:25 -04:00
simon987	649689ce30	Remove warning when generating stats	2020-07-15 20:41:38 -04:00
simon987	c8536f65a8	Fix memory leak in index	2020-07-15 20:41:09 -04:00
simon987	75b5e249c1	Merge pull request #72 from dpieski/patch-1 Update USAGE.md	2020-07-15 14:37:28 -04:00
Andrew	f49e03ac79	Update USAGE.md added example for Windows to display number of logical processors. Does this same limitation apply to the new `index` threads option?	2020-07-15 13:21:02 -05:00
simon987	a6d2afc8dc	Merge pull request #71 from simon987/web-tag Web tag	2020-07-14 20:23:22 -04:00
simon987	8f8f66ba05	Update README.md	2020-07-14 20:22:03 -04:00
simon987	1d9fcf7105	Manual tagging	2020-07-13 19:18:07 -04:00
simon987	8127745f2b	wip	2020-07-13 19:16:51 -04:00
simon987	230988d6d1	frontend tags	2020-07-13 19:15:59 -04:00
simon987	13f4dbed2d	Handle 429, multi-threaded index module	2020-07-11 17:42:46 -04:00
simon987	ed15e89f45	Fix exec-script --es-url not being passed	2020-06-28 12:41:09 -04:00
simon987	c636d3d921	Set number_of_replicas to 0 by default in elasticsearch	2020-06-26 18:10:51 -04:00
simon987	7e92d4b7d1	refresh index only if user script is ran	2020-06-25 20:48:47 -04:00
simon987	8ffe780ab2	Tag tree fix for #64 , validate required argument in exec-script	2020-06-25 20:11:30 -04:00
simon987	d3c8928fe8	Update readme	2020-06-24 21:06:27 -04:00
simon987	d9f628fca4	Build fix	2020-06-21 16:53:22 -04:00
simon987	68289268c1	Add exif tag	2020-06-21 16:51:14 -04:00
simon987	649c50c465	Update README.md	2020-06-21 14:35:18 -04:00
simon987	7b49a0dc49	Build fix	2020-06-21 12:56:13 -04:00
simon987	eb559b53aa	RAW picture file support	2020-06-21 10:46:11 -04:00
simon987	6d01f9c0df	whoops	2020-06-19 22:12:19 -04:00
simon987	e724fec668	Fix web return codes	2020-06-19 21:41:17 -04:00
simon987	fe5e93b300	Update USAGE.md	2020-06-19 21:29:09 -04:00
simon987	ecad85fd7d	version bump	2020-06-19 21:10:03 -04:00
simon987	74cc898259	Fix tag display issue	2020-06-19 21:07:19 -04:00
simon987	dc2e4443c4	Add exec-script command	2020-06-19 21:07:19 -04:00
simon987	1a64431b52	Merge pull request #63 from dpieski/patch-3 Correct typos in example	2020-06-19 18:26:10 -04:00
Andrew	9bad515e06	Correct typos in example Correct typos in examples.	2020-06-19 17:22:02 -05:00
simon987	648559cedb	Update README.md	2020-06-17 13:25:20 -04:00
simon987	3e6cd9cd5c	Merge pull request #60 from dpieski/patch-2 update Usage.md	2020-06-17 13:04:46 -04:00
simon987	f249992798	Update scripting.md	2020-06-17 13:00:07 -04:00
Andrew	e9645ecdaa	update Usage.md Fixing a link.	2020-06-17 10:58:25 -05:00
simon987	046edea0e2	Handle special characters in file paths	2020-06-10 19:45:36 -04:00
simon987	a011b7e97b	Fragment size setting	2020-06-09 21:40:53 -04:00
simon987	8c1c1697e0	Fix file wordexp in some paths #59	2020-06-05 19:41:02 -04:00
simon987	018b49fa4c	Fix csv_escape #58	2020-06-05 19:13:03 -04:00
simon987	27b4e6403e	Re-enable path autocomplete #54	2020-06-02 19:46:58 -04:00
simon987	13fdbd9e69	Fix for ES 7.7 #54	2020-06-01 18:14:34 -04:00
simon987	5e7fdaf8dd	Update issue-template.md	2020-06-01 10:45:43 -04:00
simon987	19d5c8ac9f	Update issue-template.md	2020-05-29 18:19:21 -04:00
simon987	99497049a8	Merge pull request #53 from dpieski/patch-1 Update README	2020-05-29 18:16:13 -04:00
Andrew	1a3181d78b	Update README changed case of path in a link to the usage guide to fix 404 error.	2020-05-29 15:37:20 -05:00
simon987	449aa77c8f	Fix for unknown mime inside archives	2020-05-25 17:36:04 -04:00
simon987	3058c55510	Memory leak fix #37	2020-05-24 15:42:42 -04:00
simon987	dedf9287b2	Fix name separation in `--archive list` mode	2020-05-24 14:36:59 -04:00
simon987	ab199b0c0c	Remove arc_reset() function because seek() inside archive doesn't work	2020-05-24 14:18:31 -04:00
simon987	c4fbae123e	Better support for media files inside archives	2020-05-24 14:10:23 -04:00
simon987	dd2397ef5c	handle .tgz #44 , ignore files inside archives for stats page	2020-05-24 10:10:28 -04:00
simon987	ee0f71f4d3	fix compile warning	2020-05-17 15:00:56 -04:00
simon987	0bbb96b149	Merge pull request #51 from simon987/stats Stats page	2020-05-17 14:49:28 -04:00
simon987	78f6e16701	image	2020-05-17 12:47:45 -04:00
simon987	4625bca9a9	stats	2020-05-17 12:47:02 -04:00
simon987	f2ae653886	Revert "wip" This reverts commit `5686bc86`	2020-05-16 08:16:49 -04:00
simon987	5686bc864d	wip	2020-05-13 17:37:40 -04:00
simon987	cf513b4ad8	Escape invalid UTF8 characters simon987/sist2#44 , increase magic buffer size	2020-05-12 19:28:02 -04:00
simon987	013423424e	UTF-8 fix attempt w/ libarchive (#44 )	2020-05-10 19:52:42 -04:00
simon987	16514fd6b0	Option to search in path #49	2020-05-09 22:00:22 -04:00
simon987	27509f97e1	Update USAGE.md	2020-05-08 19:08:46 -04:00
simon987	4c540eae1c	Update USAGE.md	2020-05-08 19:07:45 -04:00
simon987	d2b53ff6fc	Update README.md	2020-05-08 18:32:32 -04:00
simon987	0ef4292abf	Fix duplicate tag problem (simon987/sist2#48 )	2020-05-05 20:20:10 -04:00
simon987	e6fde38c24	Load defaults when LocalStorage is outdated	2020-05-03 08:13:25 -04:00
simon987	5fa343d40f	fix version typo	2020-05-03 08:10:28 -04:00
simon987	7ee1374802	oops	2020-04-30 21:21:48 -04:00
simon987	bd9e56829c	Support for markup files	2020-04-30 20:21:09 -04:00
simon987	718169345e	gzip artifacts in CI	2020-04-21 19:34:46 -04:00
simon987	5a6aa763ca	build fix	2020-04-21 18:50:32 -04:00
simon987	695d9abd83	revert debug hard-coded listen address	2020-04-21 15:52:35 -04:00
simon987	e436af7b2a	2.0 (#46 ) * extract scan code to libscan, (wip) * submodules * replace curl with mongoose (wip) * replace onion with mongoose (wip) * replace onion with mongoose (wip) * It compiles! (I think) * Update readme * Entirely remove libonion (WIP) * unscramble submodules * recover screenshot * Update mappings * Bug fixes * update * media meta fix * memory fixes * More bug fixes... * Bug fix w/ libmagic & vfile * libmagic fix (again) * Better lightbox, better video handler, random reloads fix * Use svg for info icon * re-enable http auth * mobi support #41, fix logs * Update README & cleanup	2020-04-21 14:42:20 -04:00
simon987	4501a7810f	Update issue-template.md	2020-04-11 07:33:33 -04:00
simon987	e36761fa6a	Update issue templates	2020-04-11 07:28:48 -04:00
simon987	fe53b79d56	Fix warnings	2020-03-25 08:18:59 -04:00
simon987	09615bbed6	Update dependencies	2020-03-24 14:30:23 -04:00
simon987	a2be9b955c	Fix build errors	2020-03-24 11:49:13 -04:00
simon987	9298bd2d9d	CI fix...	2020-03-24 10:09:33 -04:00
simon987	317034ba21	teamcity automation attempt	2020-03-24 10:01:27 -04:00
simon987	0505303503	text_buffer bug fixes & Sort option	2020-03-20 20:54:22 -04:00
simon987	6e5772f13b	Errors cleanup	2020-03-20 10:05:10 -04:00
simon987	ccccdb3b78	Fixes #38	2020-03-13 16:35:11 -04:00
simon987	12d17acf4f	UI fixes	2020-03-06 12:27:38 -05:00
simon987	48b56cdb7b	I forgot to commit this somehow	2020-03-06 10:32:05 -05:00
simon987	048f707f80	Fix buffer overflow in json parse function (index module)	2020-03-06 10:17:21 -05:00
simon987	98e0a5fd64	Update CI script	2020-03-06 09:41:33 -05:00
simon987	740a49a09f	version bump	2020-03-06 09:36:46 -05:00
simon987	81be662574	(breaking) update mime list	2020-03-06 09:36:21 -05:00
simon987	02fa3f02f5	Fix memory leak with virtual files in parse.c	2020-03-06 09:36:07 -05:00
simon987	cfdd7bdd87	Fix memory leak in font.c	2020-03-06 09:35:19 -05:00
simon987	7ceb645926	hotfix invalid read in text_buffer	2020-03-06 09:34:41 -05:00
simon987	7d0091f647	whoops	2020-03-05 21:54:56 -05:00
simon987	b3cd630399	Update README.md	2020-03-05 19:42:06 -05:00
simon987	5f7a1acfe3	Merge pull request #36 from simon987/wip-doc Wip doc	2020-03-05 18:43:56 -05:00
simon987	513a21cca2	Undo debug stuff	2020-03-05 18:42:51 -05:00
simon987	04dbfb23ab	Cleanup warnings	2020-03-05 16:53:30 -05:00
simon987	1abddabeec	Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks	2020-03-05 16:12:34 -05:00
simon987	9ace5774af	Update dependencies	2020-03-05 16:10:45 -05:00
simon987	eab6101cf7	make --fast faster	2020-03-05 12:26:43 -05:00
simon987	d7cbd5d2b6	wip doc rewrite	2020-03-05 09:13:37 -05:00
simon987	641edf2715	Prettier warning messages in main.c	2020-03-04 17:57:49 -05:00
simon987	7efb4957bf	inline text/util functions	2020-03-04 17:50:31 -05:00
simon987	9ae77fdedb	Fix css glitch	2020-03-03 16:51:01 -05:00
simon987	98c40901ed	Disallow incremental scan when version does not match (#33 )	2020-03-03 16:36:07 -05:00
simon987	363375d5da	version bump	2020-03-03 16:25:41 -05:00
simon987	149de95d88	(breaking) Upgrade path filter bar	2020-03-03 16:24:24 -05:00
simon987	e5bb4856d2	(breaking) Set item depth in ingest pipeline	2020-03-02 17:39:25 -05:00
simon987	d78994d427	Ignore --incremental option when the directory does not exist (#31 )	2020-03-01 21:16:50 -05:00
simon987	f2d68d54df	Update README.md	2020-03-01 13:55:08 -05:00
simon987	e03625838b	Settings menu (#30 ) and UI tweaks	2020-02-29 19:26:09 -05:00
simon987	86840b46f4	Version bump	2020-02-27 09:47:06 -05:00
simon987	e57f9916eb	Rewrite documentation	2020-02-27 09:45:14 -05:00
simon987	565ba6ee76	Fix for #29	2020-02-27 09:44:19 -05:00
simon987	d83fc2c373	Fix docker build for 1.2.15	2020-02-27 09:42:18 -05:00
simon	d4da28249e	--fast option #27	2020-02-22 18:37:08 -05:00
simon	483a454c8d	--exclude argument #26	2020-02-22 16:55:35 -05:00
simon	018ac86640	fix build...	2020-02-22 13:20:41 -05:00
simon	398f1aead4	Support for cbr documents	2020-02-22 13:11:19 -05:00
simon	d19a75926b	Fix invalid read in terminate_string()	2020-02-22 13:10:40 -05:00
simon	1ac8b40e3d	Code style	2020-02-22 09:02:59 -05:00
simon	a8505cb8c1	Fix for #28	2020-02-20 16:42:13 -05:00
simon	ae8652d86e	UI tweaks, search syntax (#25 )	2020-02-16 15:24:29 -05:00
simon	849beb09d8	hotfix	2020-02-15 19:33:18 -05:00
simon	e1aaaee617	UI tweak	2020-02-15 09:30:14 -05:00
simon	c02b940945	(I forgot to commit this)	2020-02-14 20:58:10 -05:00
simon	2934ddb07f	Add image viewer (#2 )	2020-02-14 18:28:55 -05:00
simon	7f6f3c02fa	OCR tweaks	2020-02-11 21:13:47 -05:00
simon	7f98d5a682	Fix buffer overflow (whoops)	2020-02-09 18:11:29 -05:00
simon	7eb9c5d7d5	Fix web/index issue with NULL mime types	2020-02-09 17:23:49 -05:00
simon	184439aa38	increase minimum image size for OCR	2020-02-09 14:06:59 -05:00
simon	1ce8b298a1	Display EXIF tags on document info panel, remove march=native on openjp	2020-02-09 13:21:19 -05:00
simon	75f99025d9	add exif dateTime, allow some special characters in text meta	2020-02-09 08:47:13 -05:00
simon	ebe852bd5a	Fix rewrite-url arg	2020-02-09 08:23:17 -05:00
simon	402b103c49	Fix total count for ES 7.5	2020-02-08 09:25:00 -05:00
simon	e9b6e1cdc2	Turn off auto optimisation in libtesseract build	2020-02-08 08:32:04 -05:00
simon	ed1ce8ab5e	Handle XML errors #18	2020-02-07 10:08:01 -05:00
simon	d1fa4febc4	Improve scroll feature, UI fix	2020-02-07 10:08:01 -05:00
simon987	048c55df7b	Update README.md	2020-02-06 19:56:29 -05:00
simon987	f77bc6a025	Update README.md	2020-02-06 19:55:32 -05:00
simon	efdde2734e	version bump	2020-02-06 19:28:05 -05:00
simon	66658fa8f7	Remove trailing/leading white space in text meta fields	2020-02-06 19:27:30 -05:00
simon	df41c251e4	(Breaking!) Add some exif tags	2020-02-06 19:21:50 -05:00
simon	3282ab56ba	Version bump	2020-02-02 09:26:54 -05:00
simon	8300838d30	Suppress XML parsing errors (#18 )	2020-02-02 09:26:03 -05:00
simon	c9870a6d3d	Remove -march=native for release build...	2020-02-02 09:03:06 -05:00
simon	a143cc4fcf	bundle openssl...	2020-02-02 08:39:20 -05:00
simon	9ef1f3781d	fix attempt for #11	2020-02-01 20:04:26 -05:00
simon	bbee8aa721	tesseract ocr path fix	2020-02-01 20:03:59 -05:00
simon	d22f83c797	curl fix	2020-02-01 15:22:43 -05:00
simon	50615486a4	curl fix attempt	2020-02-01 14:42:42 -05:00
simon	ca79e4f797	add /status endpoint	2020-01-28 10:18:37 -05:00
simon987	6a9fd08a80	Merge pull request #21 from simon987/wip-20 Fixes #20	2020-01-27 09:16:00 -05:00
simon987	cab890dc9b	#20 wip	2020-01-27 09:09:42 -05:00
simon987	b3c4faf2df	Update README.md	2020-01-26 12:37:13 -05:00
simon987	353937171a	Update README.md	2020-01-20 15:54:53 -05:00
simon	c80002bea4	Bundle libcurl attempt 2	2020-01-18 11:53:12 -05:00
simon	56adee9d81	Bundle libcurl, libopc bugfix #18	2020-01-18 10:25:02 -05:00
simon	d6493d6d5f	Bundle libpng	2020-01-16 16:21:38 -05:00
simon	0967e9676d	remove static build in CI...	2020-01-16 15:45:18 -05:00
simon	487e998ea0	Display error message on /d/ error	2020-01-16 15:04:50 -05:00