Update CI script

version bump
(breaking) update mime list
2025-12-12 15:08:53 +00:00 · 2020-03-06 09:41:33 -05:00 · 2020-03-06 09:36:46 -05:00 · 2020-03-06 09:36:21 -05:00 · 2020-03-06 09:36:07 -05:00 · 2020-03-06 09:35:19 -05:00
101 changed files with 7054 additions and 1594 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,8 @@ Makefile
 LOG
 sist2*
 index.sist2/
-bundle.css
+bundle*.css
 bundle.js
 *.a
 vgcore.*
+build/
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,15 +4,42 @@
 [submodule "cJSON"]
 	path = cJSON
 	url = https://github.com/DaveGamble/cJSON
-[submodule "lib/mupdf"]
-	path = lib/mupdf
-	url = git://git.ghostscript.com/mupdf.git
-[submodule "lib/onion"]
-	path = lib/onion
-	url = https://github.com/davidmoreno/onion
-[submodule "lib/ffmpeg"]
-	path = lib/ffmpeg
-	url = https://git.ffmpeg.org/ffmpeg.git
 [submodule "lmdb"]
 	path = lmdb
 	url = https://github.com/LMDB/lmdb
+[submodule "utf8.h"]
+	path = utf8.h
+	url = https://github.com/sheredom/utf8.h
+[submodule "lib/bzip2-1.0.6"]
+	path = lib/bzip2-1.0.6
+	url = https://github.com/enthought/bzip2-1.0.6
+[submodule "lib/libmagic"]
+	path = lib/libmagic
+	url = https://github.com/threatstack/libmagic
+[submodule "lib/harfbuzz"]
+	path = lib/harfbuzz
+	url = https://github.com/harfbuzz/harfbuzz
+[submodule "lib/openjpeg"]
+	path = lib/openjpeg
+	url = https://github.com/uclouvain/openjpeg
+[submodule "lib/ffmpeg"]
+	path = lib/ffmpeg
+	url = https://git.ffmpeg.org/ffmpeg.git
+[submodule "lib/onion"]
+	path = lib/onion
+	url = https://github.com/davidmoreno/onion
+[submodule "lib/mupdf"]
+	path = lib/mupdf
+	url = git://git.ghostscript.com/mupdf.git
+[submodule "lib/tesseract"]
+	path = lib/tesseract
+	url = https://github.com/tesseract-ocr/tesseract
+[submodule "lib/leptonica"]
+	path = lib/leptonica
+	url = https://github.com/danbloomberg/leptonica
+[submodule "lib/libtiff"]
+	path = lib/libtiff
+	url = https://gitlab.com/libtiff/libtiff
+[submodule "lib/libpng"]
+	path = lib/libpng
+	url = https://github.com/glennrp/libpng
--- a/.teamcity/settings.kts
+++ b/.teamcity/settings.kts
@@ -0,0 +1,69 @@
+import jetbrains.buildServer.configs.kotlin.v2019_2.*
+import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.ExecBuildStep
+import jetbrains.buildServer.configs.kotlin.v2019_2.buildSteps.exec
+import jetbrains.buildServer.configs.kotlin.v2019_2.triggers.vcs
+import jetbrains.buildServer.configs.kotlin.v2019_2.vcs.GitVcsRoot
+
+/*
+The settings script is an entry point for defining a TeamCity
+project hierarchy. The script should contain a single call to the
+project() function with a Project instance or an init function as
+an argument.
+
+VcsRoots, BuildTypes, Templates, and subprojects can be
+registered inside the project using the vcsRoot(), buildType(),
+template(), and subProject() methods respectively.
+
+To debug settings scripts in command-line, run the
+
+    mvnDebug org.jetbrains.teamcity:teamcity-configs-maven-plugin:generate
+
+command and attach your debugger to the port 8000.
+
+To debug in IntelliJ Idea, open the 'Maven Projects' tool window (View
+-> Tool Windows -> Maven Projects), find the generate task node
+(Plugins -> teamcity-configs -> teamcity-configs:generate), the
+'Debug' option is available in the context menu for the task.
+*/
+
+version = "2019.2"
+
+project {
+
+    vcsRoot(HttpsGithubComSimon987sist2refsHeadsMaster)
+
+    buildType(Build)
+}
+
+object Build : BuildType({
+    name = "Build"
+
+    artifactRules = """
+        sist2
+        sist2_scan
+    """.trimIndent()
+
+    vcs {
+        root(HttpsGithubComSimon987sist2refsHeadsMaster)
+    }
+
+    steps {
+        exec {
+            name = "Build"
+            path = "./ci/build.sh"
+            dockerImage = "simon987/general_ci"
+            dockerImagePlatform = ExecBuildStep.ImagePlatform.Linux
+            dockerPull = true
+        }
+    }
+
+    triggers {
+        vcs {
+        }
+    }
+})
+
+object HttpsGithubComSimon987sist2refsHeadsMaster : GitVcsRoot({
+    name = "https://github.com/simon987/sist2#refs/heads/master"
+    url = "https://github.com/simon987/sist2"
+})
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,8 @@ set(CMAKE_C_STANDARD 11)
 project(sist2 C)
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/CMakeModules")

+option(SIST_DEBUG "Build a debug executable" on)
+
 add_executable(
        sist2
        src/main.c
@@ -19,9 +21,14 @@ add_executable(
        src/parsing/text.h src/parsing/text.c
        src/index/web.c src/index/web.h
        src/web/serve.c src/web/serve.h
+        src/web/auth_basic.h src/web/auth_basic.c
        src/index/elastic.c src/index/elastic.h
        src/util.c src/util.h
        src/ctx.h src/types.h src/parsing/font.c src/parsing/font.h
+        src/parsing/arc.c src/parsing/arc.h
+        src/parsing/doc.c src/parsing/doc.h
+        src/log.c src/log.h
+        src/parsing/cbr.h src/parsing/cbr.c

        # argparse
        argparse/argparse.h argparse/argparse.c
@@ -32,63 +39,84 @@ add_executable(
        # LMDB
        lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
        lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
-        src/cli.c src/cli.h)
+        src/cli.c src/cli.h
+
+        # utf8.h
+        utf8.h/utf8.h
+)

 find_package(PkgConfig REQUIRED)
 set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/lib/pkgconfig/")

-find_package(LibMagic REQUIRED)
-find_package(FFmpeg REQUIRED)
-find_package(OpenSSL REQUIRED)
 find_package(Freetype REQUIRED)

 pkg_check_modules(GLIB REQUIRED glib-2.0)
 pkg_check_modules(GOBJECT REQUIRED gobject-2.0)
 pkg_check_modules(UUID REQUIRED uuid)

-include_directories(${LIBMAGIC_INCLUDE_DIRS})
-link_directories(${LIBMAGIC_LIBRARY_DIRS})
-add_definitions(${LIBMAGIC_CFLAGS_OTHER})
-
-link_directories(${UUID_LIBRARY_DIRS})
-include_directories(${UUID_INCLUDE_DIRS})
 add_definitions(${UUID_CFLAGS_OTHER})
-
-include_directories(${GLIB_INCLUDE_DIRS})
-link_directories(${GLIB_LIBRARY_DIRS})
 add_definitions(${GLIB_CFLAGS_OTHER})
-
-include_directories(${GOBJECT_INCLUDE_DIRS})
-link_directories(${GOBJECT_LIBRARY_DIRS})
 add_definitions(${GOBJECT_CFLAGS_OTHER})
-
-link_directories(${FFMPEG_LIBRARY_DIRS})
-include_directories(${FFMPEG_INCLUDE_DIRS})
-
-include_directories(${OPENSSL_INCLUDE_DIR})
-link_directories(${OPENSSL_CRYPTO_LIBRARY})
+add_definitions(${FREETYPE_CFLAGS_OTHER})

 list(REMOVE_ITEM GLIB_LIBRARIES pcre)
 list(REMOVE_ITEM GOBJECT_LIBRARIES pcre)
 list(REMOVE_ITEM UUID_LIBRARIES pcre)

-include_directories(${FREETYPE_INCLUDE_DIRS})
-add_definitions(${FREETYPE_CFLAGS_OTHER})
-
-include_directories(
+target_include_directories(
+        sist2 PUBLIC
+        ${GOBJECT_INCLUDE_DIRS}
+        ${GLIB_INCLUDE_DIRS}
+        ${PROJECT_SOURCE_DIR}/lib/ffmpeg/
+        ${FREETYPE_INCLUDE_DIRS}
+        ${UUID_INCLUDE_DIRS}
        ${PROJECT_SOURCE_DIR}/
        ${PROJECT_SOURCE_DIR}/lmdb/libraries/liblmdb/
        ${PROJECT_SOURCE_DIR}/lib/onion/src/
        ${PROJECT_SOURCE_DIR}/lib/mupdf/include/
+        ${PROJECT_SOURCE_DIR}/include/
+        /usr/include/libxml2/
+        ${PROJECT_SOURCE_DIR}/lib/tesseract/include/
+)
+target_link_directories(
+        sist2 PUBLIC
+        ${UUID_LIBRARY_DIRS}
 )

-target_compile_options(sist2
+target_compile_options(
+        sist2
        PRIVATE
-        -O3
-#        -march=native
-        -fno-stack-protector
-        -fomit-frame-pointer
-        )
+        -fPIC
+)
+
+if (SIST_DEBUG)
+    target_compile_options(
+            sist2
+            PRIVATE
+            -g
+            -fstack-protector
+            -fno-omit-frame-pointer
+            -fsanitize=address
+    )
+    target_link_options(
+            sist2
+            PRIVATE
+            -fsanitize=address
+    )
+    set_target_properties(
+            sist2
+            PROPERTIES
+            OUTPUT_NAME sist2_debug
+    )
+else ()
+    target_compile_options(
+            sist2
+            PRIVATE
+            -Ofast
+            -fno-stack-protector
+            -fomit-frame-pointer
+    )
+endif ()

 TARGET_LINK_LIBRARIES(
        sist2
@@ -98,13 +126,11 @@ TARGET_LINK_LIBRARIES(
        ${UUID_LIBRARIES}

        # ffmpeg
-#        ${PROJECT_SOURCE_DIR}/lib/libavcodec.a
-#        ${PROJECT_SOURCE_DIR}/lib/libavformat.a
-#        ${PROJECT_SOURCE_DIR}/lib/libavutil.a
-#        ${PROJECT_SOURCE_DIR}/lib/libswscale.a
-#        ${PROJECT_SOURCE_DIR}/lib/libswresample.a
-        ${FFMPEG_LIBRARIES}
-        swscale
+        ${PROJECT_SOURCE_DIR}/lib/libavcodec.a
+        ${PROJECT_SOURCE_DIR}/lib/libavformat.a
+        ${PROJECT_SOURCE_DIR}/lib/libavutil.a
+        ${PROJECT_SOURCE_DIR}/lib/libswscale.a
+        ${PROJECT_SOURCE_DIR}/lib/libswresample.a

        # mupdf
        ${PROJECT_SOURCE_DIR}/lib/libmupdf.a
@@ -114,14 +140,36 @@ TARGET_LINK_LIBRARIES(
        ${PROJECT_SOURCE_DIR}/lib/libonion_static.a

        pthread
-        curl
+
        m
        bz2
+#        ${PROJECT_SOURCE_DIR}/lib/libmagic.a
        magic
+        ${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
+        ${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
+        freetype
+        archive
+
+        xml2
+
+        ${PROJECT_SOURCE_DIR}/lib/libtesseract.a
+        ${PROJECT_SOURCE_DIR}/lib/liblept.a
+        ${PROJECT_SOURCE_DIR}/lib/libtiff.a
+        ${PROJECT_SOURCE_DIR}/lib/libpng16.a
+        stdc++
+
+        # curl
+        ${PROJECT_SOURCE_DIR}/lib/libcurl.a
+        ${PROJECT_SOURCE_DIR}/lib/libcrypto.a
+        ${PROJECT_SOURCE_DIR}/lib/libssl.a
+        dl
+
+        pcre
 )

 add_custom_target(
        before_sist2
        COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/scripts/before_build.sh
 )
+
 add_dependencies(sist2 before_sist2)
--- a/CMakeModules/FindFFmpeg.cmake
+++ b/CMakeModules/FindFFmpeg.cmake
@@ -0,0 +1,80 @@
+# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
+# Once done this will define
+#
+# FFMPEG_FOUND - system has ffmpeg or libav
+# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
+# FFMPEG_LIBRARIES - Link these to use ffmpeg
+# FFMPEG_LIBAVCODEC
+# FFMPEG_LIBAVFORMAT
+# FFMPEG_LIBAVUTIL
+#
+# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
+# Modified for other libraries by Lasse Kärkkäinen <tronic>
+# Modified for Hedgewars by Stepik777
+#
+# Redistribution and use is allowed according to the terms of the New
+# BSD license.
+#
+
+if (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
+# in cache already
+set(FFMPEG_FOUND TRUE)
+else (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
+# use pkg-config to get the directories and then use these values
+# in the FIND_PATH() and FIND_LIBRARY() calls
+find_package(PkgConfig)
+if (PKG_CONFIG_FOUND)
+pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
+pkg_check_modules(_FFMPEG_AVFORMAT libavformat)
+pkg_check_modules(_FFMPEG_AVUTIL libavutil)
+endif (PKG_CONFIG_FOUND)
+
+find_path(FFMPEG_AVCODEC_INCLUDE_DIR
+NAMES libavcodec/avcodec.h
+PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS} /usr/include /usr/local/include /opt/local/include /sw/include
+PATH_SUFFIXES ffmpeg libav
+)
+
+find_library(FFMPEG_LIBAVCODEC
+NAMES avcodec
+PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
+)
+
+find_library(FFMPEG_LIBAVFORMAT
+NAMES avformat
+PATHS ${_FFMPEG_AVFORMAT_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
+)
+
+find_library(FFMPEG_LIBAVUTIL
+NAMES avutil
+PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS} /usr/lib /usr/local/lib /opt/local/lib /sw/lib
+)
+
+if (FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVFORMAT)
+set(FFMPEG_FOUND TRUE)
+endif()
+
+if (FFMPEG_FOUND)
+set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
+
+set(FFMPEG_LIBRARIES
+${FFMPEG_LIBAVCODEC}
+${FFMPEG_LIBAVFORMAT}
+${FFMPEG_LIBAVUTIL}
+)
+
+endif (FFMPEG_FOUND)
+
+if (FFMPEG_FOUND)
+if (NOT FFMPEG_FIND_QUIETLY)
+message(STATUS "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
+endif (NOT FFMPEG_FIND_QUIETLY)
+else (FFMPEG_FOUND)
+if (FFMPEG_FIND_REQUIRED)
+message(FATAL_ERROR "Could not find libavcodec or libavformat or libavutil")
+endif (FFMPEG_FIND_REQUIRED)
+endif (FFMPEG_FOUND)
+
+endif (FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
+
+
--- a/CMakeModules/FindLibMagic.cmake
+++ b/CMakeModules/FindLibMagic.cmake
@@ -0,0 +1,100 @@
+
+#-------------------------------------------------------------------------------
+# Copyright (c) 2013-2013, Lars Baehren <lbaehren@gmail.com>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#-------------------------------------------------------------------------------
+
+# - Check for the presence of LIBMAGIC
+#
+# The following variables are set when LIBMAGIC is found:
+#  LIBMAGIC_FOUND      = Set to true, if all components of LIBMAGIC have been
+#                        found.
+#  LIBMAGIC_INCLUDES   = Include path for the header files of LIBMAGIC
+#  LIBMAGIC_LIBRARIES  = Link these to use LIBMAGIC
+#  LIBMAGIC_LFLAGS     = Linker flags (optional)
+
+if (NOT LIBMAGIC_FOUND)
+
+  if (NOT LIBMAGIC_ROOT_DIR)
+    set (LIBMAGIC_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
+  endif (NOT LIBMAGIC_ROOT_DIR)
+
+  ##____________________________________________________________________________
+  ## Check for the header files
+
+  find_path (LIBMAGIC_FILE_H
+    NAMES file/file.h
+    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
+    PATH_SUFFIXES include
+    )
+  if (LIBMAGIC_FILE_H)
+    list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_FILE_H})
+  endif (LIBMAGIC_FILE_H)
+
+  find_path (LIBMAGIC_MAGIC_H
+    NAMES magic.h
+    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
+    PATH_SUFFIXES include include/linux
+    )
+  if (LIBMAGIC_MAGIC_H)
+    list (APPEND LIBMAGIC_INCLUDES ${LIBMAGIC_MAGIC_H})
+  endif (LIBMAGIC_MAGIC_H)
+
+  list (REMOVE_DUPLICATES LIBMAGIC_INCLUDES)
+
+  ##____________________________________________________________________________
+  ## Check for the library
+
+  find_library (LIBMAGIC_LIBRARIES magic
+    HINTS ${LIBMAGIC_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
+    PATH_SUFFIXES lib
+    )
+
+  ##____________________________________________________________________________
+  ## Actions taken when all components have been found
+
+  #find_package_handle_standard_args (LIBMAGIC DEFAULT_MSG LIBMAGIC_LIBRARIES LIBMAGIC_INCLUDES)
+
+  if (LIBMAGIC_FOUND)
+    if (NOT LIBMAGIC_FIND_QUIETLY)
+      message (STATUS "Found components for LIBMAGIC")
+      message (STATUS "LIBMAGIC_ROOT_DIR  = ${LIBMAGIC_ROOT_DIR}")
+      message (STATUS "LIBMAGIC_INCLUDES  = ${LIBMAGIC_INCLUDES}")
+      message (STATUS "LIBMAGIC_LIBRARIES = ${LIBMAGIC_LIBRARIES}")
+    endif (NOT LIBMAGIC_FIND_QUIETLY)
+  else (LIBMAGIC_FOUND)
+    if (LIBMAGIC_FIND_REQUIRED)
+      message (FATAL_ERROR "Could not find LIBMAGIC!")
+    endif (LIBMAGIC_FIND_REQUIRED)
+  endif (LIBMAGIC_FOUND)
+
+  ##____________________________________________________________________________
+  ## Mark advanced variables
+
+  mark_as_advanced (
+    LIBMAGIC_ROOT_DIR
+    LIBMAGIC_INCLUDES
+    LIBMAGIC_LIBRARIES
+    )
+
+endif (NOT LIBMAGIC_FOUND)
--- a/CMakeModules/FindOpenSSL.cmake
+++ b/CMakeModules/FindOpenSSL.cmake
@@ -0,0 +1,478 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+macro(_OpenSSL_test_and_find_dependencies ssl_library crypto_library)
+  if((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND
+     (("${ssl_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$") OR
+      ("${crypto_library}" MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")))
+    set(_OpenSSL_has_dependencies TRUE)
+    find_package(Threads)
+  else()
+    set(_OpenSSL_has_dependencies FALSE)
+  endif()
+endmacro()
+
+function(_OpenSSL_add_dependencies libraries_var library)
+  if(CMAKE_THREAD_LIBS_INIT)
+    list(APPEND ${libraries_var} ${CMAKE_THREAD_LIBS_INIT})
+  endif()
+  list(APPEND ${libraries_var} ${CMAKE_DL_LIBS})
+  set(${libraries_var} ${${libraries_var}} PARENT_SCOPE)
+endfunction()
+
+function(_OpenSSL_target_add_dependencies target)
+  if(_OpenSSL_has_dependencies)
+    set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads )
+    set_property( TARGET ${target} APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS} )
+  endif()
+endfunction()
+
+if (UNIX)
+  find_package(PkgConfig QUIET)
+  pkg_check_modules(_OPENSSL QUIET openssl)
+endif ()
+
+# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
+if(OPENSSL_USE_STATIC_LIBS)
+  set(_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+  if(WIN32)
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
+  else()
+    set(CMAKE_FIND_LIBRARY_SUFFIXES .a )
+  endif()
+endif()
+
+if (WIN32)
+  # http://www.slproweb.com/products/Win32OpenSSL.html
+  set(_OPENSSL_ROOT_HINTS
+    ${OPENSSL_ROOT_DIR}
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (32-bit)_is1;Inno Setup: App Path]"
+    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\OpenSSL (64-bit)_is1;Inno Setup: App Path]"
+    ENV OPENSSL_ROOT_DIR
+    )
+  file(TO_CMAKE_PATH "$ENV{PROGRAMFILES}" _programfiles)
+  set(_OPENSSL_ROOT_PATHS
+    "${_programfiles}/OpenSSL"
+    "${_programfiles}/OpenSSL-Win32"
+    "${_programfiles}/OpenSSL-Win64"
+    "C:/OpenSSL/"
+    "C:/OpenSSL-Win32/"
+    "C:/OpenSSL-Win64/"
+    )
+  unset(_programfiles)
+else ()
+  set(_OPENSSL_ROOT_HINTS
+    ${OPENSSL_ROOT_DIR}
+    ENV OPENSSL_ROOT_DIR
+    )
+endif ()
+
+set(_OPENSSL_ROOT_HINTS_AND_PATHS
+    HINTS ${_OPENSSL_ROOT_HINTS}
+    PATHS ${_OPENSSL_ROOT_PATHS}
+    )
+
+find_path(OPENSSL_INCLUDE_DIR
+  NAMES
+    openssl/ssl.h
+  ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+  HINTS
+    ${_OPENSSL_INCLUDEDIR}
+  PATH_SUFFIXES
+    include
+)
+
+if(WIN32 AND NOT CYGWIN)
+  if(MSVC)
+    # /MD and /MDd are the standard values - if someone wants to use
+    # others, the libnames have to change here too
+    # use also ssl and ssleay32 in debug as fallback for openssl < 0.9.8b
+    # enable OPENSSL_MSVC_STATIC_RT to get the libs build /MT (Multithreaded no-DLL)
+    # In Visual C++ naming convention each of these four kinds of Windows libraries has it's standard suffix:
+    #   * MD for dynamic-release
+    #   * MDd for dynamic-debug
+    #   * MT for static-release
+    #   * MTd for static-debug
+
+    # Implementation details:
+    # We are using the libraries located in the VC subdir instead of the parent directory even though :
+    # libeay32MD.lib is identical to ../libeay32.lib, and
+    # ssleay32MD.lib is identical to ../ssleay32.lib
+    # enable OPENSSL_USE_STATIC_LIBS to use the static libs located in lib/VC/static
+
+    if (OPENSSL_MSVC_STATIC_RT)
+      set(_OPENSSL_MSVC_RT_MODE "MT")
+    else ()
+      set(_OPENSSL_MSVC_RT_MODE "MD")
+    endif ()
+
+    # Since OpenSSL 1.1, lib names are like libcrypto32MTd.lib and libssl32MTd.lib
+    if( "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8" )
+        set(_OPENSSL_MSVC_ARCH_SUFFIX "64")
+    else()
+        set(_OPENSSL_MSVC_ARCH_SUFFIX "32")
+    endif()
+
+    if(OPENSSL_USE_STATIC_LIBS)
+      set(_OPENSSL_PATH_SUFFIXES
+        "lib/VC/static"
+        "VC/static"
+        "lib"
+        )
+    else()
+      set(_OPENSSL_PATH_SUFFIXES
+        "lib/VC"
+        "VC"
+        "lib"
+        )
+    endif ()
+
+    find_library(LIB_EAY_DEBUG
+      NAMES
+        libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
+        libcrypto${_OPENSSL_MSVC_RT_MODE}d
+        libcryptod
+        libeay32${_OPENSSL_MSVC_RT_MODE}d
+        libeay32d
+        cryptod
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        ${_OPENSSL_PATH_SUFFIXES}
+    )
+
+    find_library(LIB_EAY_RELEASE
+      NAMES
+        libcrypto${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
+        libcrypto${_OPENSSL_MSVC_RT_MODE}
+        libcrypto
+        libeay32${_OPENSSL_MSVC_RT_MODE}
+        libeay32
+        crypto
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        ${_OPENSSL_PATH_SUFFIXES}
+    )
+
+    find_library(SSL_EAY_DEBUG
+      NAMES
+        libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}d
+        libssl${_OPENSSL_MSVC_RT_MODE}d
+        libssld
+        ssleay32${_OPENSSL_MSVC_RT_MODE}d
+        ssleay32d
+        ssld
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        ${_OPENSSL_PATH_SUFFIXES}
+    )
+
+    find_library(SSL_EAY_RELEASE
+      NAMES
+        libssl${_OPENSSL_MSVC_ARCH_SUFFIX}${_OPENSSL_MSVC_RT_MODE}
+        libssl${_OPENSSL_MSVC_RT_MODE}
+        libssl
+        ssleay32${_OPENSSL_MSVC_RT_MODE}
+        ssleay32
+        ssl
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        ${_OPENSSL_PATH_SUFFIXES}
+    )
+
+    set(LIB_EAY_LIBRARY_DEBUG "${LIB_EAY_DEBUG}")
+    set(LIB_EAY_LIBRARY_RELEASE "${LIB_EAY_RELEASE}")
+    set(SSL_EAY_LIBRARY_DEBUG "${SSL_EAY_DEBUG}")
+    set(SSL_EAY_LIBRARY_RELEASE "${SSL_EAY_RELEASE}")
+
+    include(${CMAKE_CURRENT_LIST_DIR}/SelectLibraryConfigurations.cmake)
+    select_library_configurations(LIB_EAY)
+    select_library_configurations(SSL_EAY)
+
+    mark_as_advanced(LIB_EAY_LIBRARY_DEBUG LIB_EAY_LIBRARY_RELEASE
+                     SSL_EAY_LIBRARY_DEBUG SSL_EAY_LIBRARY_RELEASE)
+    set(OPENSSL_SSL_LIBRARY ${SSL_EAY_LIBRARY} )
+    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY_LIBRARY} )
+  elseif(MINGW)
+    # same player, for MinGW
+    set(LIB_EAY_NAMES crypto libeay32)
+    set(SSL_EAY_NAMES ssl ssleay32)
+    find_library(LIB_EAY
+      NAMES
+        ${LIB_EAY_NAMES}
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        "lib/MinGW"
+        "lib"
+    )
+
+    find_library(SSL_EAY
+      NAMES
+        ${SSL_EAY_NAMES}
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      PATH_SUFFIXES
+        "lib/MinGW"
+        "lib"
+    )
+
+    mark_as_advanced(SSL_EAY LIB_EAY)
+    set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
+    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
+    unset(LIB_EAY_NAMES)
+    unset(SSL_EAY_NAMES)
+  else()
+    # Not sure what to pick for -say- intel, let's use the toplevel ones and hope someone report issues:
+    find_library(LIB_EAY
+      NAMES
+        libcrypto
+        libeay32
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      HINTS
+        ${_OPENSSL_LIBDIR}
+      PATH_SUFFIXES
+        lib
+    )
+
+    find_library(SSL_EAY
+      NAMES
+        libssl
+        ssleay32
+      NAMES_PER_DIR
+      ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+      HINTS
+        ${_OPENSSL_LIBDIR}
+      PATH_SUFFIXES
+        lib
+    )
+
+    mark_as_advanced(SSL_EAY LIB_EAY)
+    set(OPENSSL_SSL_LIBRARY ${SSL_EAY} )
+    set(OPENSSL_CRYPTO_LIBRARY ${LIB_EAY} )
+  endif()
+else()
+
+  find_library(OPENSSL_SSL_LIBRARY
+    NAMES
+      ssl
+      ssleay32
+      ssleay32MD
+    NAMES_PER_DIR
+    ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+    HINTS
+      ${_OPENSSL_LIBDIR}
+    PATH_SUFFIXES
+      lib
+  )
+
+  find_library(OPENSSL_CRYPTO_LIBRARY
+    NAMES
+      crypto
+    NAMES_PER_DIR
+    ${_OPENSSL_ROOT_HINTS_AND_PATHS}
+    HINTS
+      ${_OPENSSL_LIBDIR}
+    PATH_SUFFIXES
+      lib
+  )
+
+  mark_as_advanced(OPENSSL_CRYPTO_LIBRARY OPENSSL_SSL_LIBRARY)
+
+endif()
+
+# compat defines
+set(OPENSSL_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY})
+set(OPENSSL_CRYPTO_LIBRARIES ${OPENSSL_CRYPTO_LIBRARY})
+_OpenSSL_test_and_find_dependencies("${OPENSSL_SSL_LIBRARY}" "${OPENSSL_CRYPTO_LIBRARY}")
+if(_OpenSSL_has_dependencies)
+  _OpenSSL_add_dependencies( OPENSSL_SSL_LIBRARIES "${OPENSSL_SSL_LIBRARY}" )
+  _OpenSSL_add_dependencies( OPENSSL_CRYPTO_LIBRARIES "${OPENSSL_CRYPTO_LIBRARY}" )
+endif()
+
+function(from_hex HEX DEC)
+  string(TOUPPER "${HEX}" HEX)
+  set(_res 0)
+  string(LENGTH "${HEX}" _strlen)
+
+  while (_strlen GREATER 0)
+    math(EXPR _res "${_res} * 16")
+    string(SUBSTRING "${HEX}" 0 1 NIBBLE)
+    string(SUBSTRING "${HEX}" 1 -1 HEX)
+    if (NIBBLE STREQUAL "A")
+      math(EXPR _res "${_res} + 10")
+    elseif (NIBBLE STREQUAL "B")
+      math(EXPR _res "${_res} + 11")
+    elseif (NIBBLE STREQUAL "C")
+      math(EXPR _res "${_res} + 12")
+    elseif (NIBBLE STREQUAL "D")
+      math(EXPR _res "${_res} + 13")
+    elseif (NIBBLE STREQUAL "E")
+      math(EXPR _res "${_res} + 14")
+    elseif (NIBBLE STREQUAL "F")
+      math(EXPR _res "${_res} + 15")
+    else()
+      math(EXPR _res "${_res} + ${NIBBLE}")
+    endif()
+
+    string(LENGTH "${HEX}" _strlen)
+  endwhile()
+
+  set(${DEC} ${_res} PARENT_SCOPE)
+endfunction()
+
+if(OPENSSL_INCLUDE_DIR AND EXISTS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h")
+  file(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h" openssl_version_str
+       REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
+
+  if(openssl_version_str)
+    # The version number is encoded as 0xMNNFFPPS: major minor fix patch status
+    # The status gives if this is a developer or prerelease and is ignored here.
+    # Major, minor, and fix directly translate into the version numbers shown in
+    # the string. The patch field translates to the single character suffix that
+    # indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
+    # on.
+
+    string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
+           "\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
+    list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
+    list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
+    from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
+    list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
+    from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
+    list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
+
+    if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
+      from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
+      # 96 is the ASCII code of 'a' minus 1
+      math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
+      unset(_tmp)
+      # Once anyone knows how OpenSSL would call the patch versions beyond 'z'
+      # this should be updated to handle that, too. This has not happened yet
+      # so it is simply ignored here for now.
+      string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
+    endif ()
+
+    set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
+  endif ()
+endif ()
+
+set(OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES} )
+list(REMOVE_DUPLICATES OPENSSL_LIBRARIES)
+
+foreach(_comp IN LISTS OpenSSL_FIND_COMPONENTS)
+  if(_comp STREQUAL "Crypto")
+    if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
+        (EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
+        EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
+        EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
+    )
+      set(OpenSSL_${_comp}_FOUND TRUE)
+    else()
+      set(OpenSSL_${_comp}_FOUND FALSE)
+    endif()
+  elseif(_comp STREQUAL "SSL")
+    if(EXISTS "${OPENSSL_INCLUDE_DIR}" AND
+        (EXISTS "${OPENSSL_SSL_LIBRARY}" OR
+        EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
+        EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
+    )
+      set(OpenSSL_${_comp}_FOUND TRUE)
+    else()
+      set(OpenSSL_${_comp}_FOUND FALSE)
+    endif()
+  else()
+    message(WARNING "${_comp} is not a valid OpenSSL component")
+    set(OpenSSL_${_comp}_FOUND FALSE)
+  endif()
+endforeach()
+unset(_comp)
+
+include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
+find_package_handle_standard_args(OpenSSL
+  REQUIRED_VARS
+    OPENSSL_CRYPTO_LIBRARY
+    OPENSSL_INCLUDE_DIR
+  VERSION_VAR
+    OPENSSL_VERSION
+  HANDLE_COMPONENTS
+  FAIL_MESSAGE
+    "Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR"
+)
+
+mark_as_advanced(OPENSSL_INCLUDE_DIR OPENSSL_LIBRARIES)
+
+if(OPENSSL_FOUND)
+  if(NOT TARGET OpenSSL::Crypto AND
+      (EXISTS "${OPENSSL_CRYPTO_LIBRARY}" OR
+        EXISTS "${LIB_EAY_LIBRARY_DEBUG}" OR
+        EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
+      )
+    add_library(OpenSSL::Crypto UNKNOWN IMPORTED)
+    set_target_properties(OpenSSL::Crypto PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
+    if(EXISTS "${OPENSSL_CRYPTO_LIBRARY}")
+      set_target_properties(OpenSSL::Crypto PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+        IMPORTED_LOCATION "${OPENSSL_CRYPTO_LIBRARY}")
+    endif()
+    if(EXISTS "${LIB_EAY_LIBRARY_RELEASE}")
+      set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS RELEASE)
+      set_target_properties(OpenSSL::Crypto PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
+        IMPORTED_LOCATION_RELEASE "${LIB_EAY_LIBRARY_RELEASE}")
+    endif()
+    if(EXISTS "${LIB_EAY_LIBRARY_DEBUG}")
+      set_property(TARGET OpenSSL::Crypto APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS DEBUG)
+      set_target_properties(OpenSSL::Crypto PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
+        IMPORTED_LOCATION_DEBUG "${LIB_EAY_LIBRARY_DEBUG}")
+    endif()
+    _OpenSSL_target_add_dependencies(OpenSSL::Crypto)
+  endif()
+
+  if(NOT TARGET OpenSSL::SSL AND
+      (EXISTS "${OPENSSL_SSL_LIBRARY}" OR
+        EXISTS "${SSL_EAY_LIBRARY_DEBUG}" OR
+        EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
+      )
+    add_library(OpenSSL::SSL UNKNOWN IMPORTED)
+    set_target_properties(OpenSSL::SSL PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${OPENSSL_INCLUDE_DIR}")
+    if(EXISTS "${OPENSSL_SSL_LIBRARY}")
+      set_target_properties(OpenSSL::SSL PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+        IMPORTED_LOCATION "${OPENSSL_SSL_LIBRARY}")
+    endif()
+    if(EXISTS "${SSL_EAY_LIBRARY_RELEASE}")
+      set_property(TARGET OpenSSL::SSL APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS RELEASE)
+      set_target_properties(OpenSSL::SSL PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
+        IMPORTED_LOCATION_RELEASE "${SSL_EAY_LIBRARY_RELEASE}")
+    endif()
+    if(EXISTS "${SSL_EAY_LIBRARY_DEBUG}")
+      set_property(TARGET OpenSSL::SSL APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS DEBUG)
+      set_target_properties(OpenSSL::SSL PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES_DEBUG "C"
+        IMPORTED_LOCATION_DEBUG "${SSL_EAY_LIBRARY_DEBUG}")
+    endif()
+    if(TARGET OpenSSL::Crypto)
+      set_target_properties(OpenSSL::SSL PROPERTIES
+        INTERFACE_LINK_LIBRARIES OpenSSL::Crypto)
+    endif()
+    _OpenSSL_target_add_dependencies(OpenSSL::SSL)
+  endif()
+endif()
+
+# Restore the original find library ordering
+if(OPENSSL_USE_STATIC_LIBS)
+  set(CMAKE_FIND_LIBRARY_SUFFIXES ${_openssl_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
+endif()
--- a/CMakeModules/FindPackageHandleStandardArgs.cmake
+++ b/CMakeModules/FindPackageHandleStandardArgs.cmake
@@ -0,0 +1,268 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)
+
+# internal helper macro
+macro(_FPHSA_FAILURE_MESSAGE _msg)
+  set (__msg "${_msg}")
+  if (FPHSA_REASON_FAILURE_MESSAGE)
+    string(APPEND __msg "\n    Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n")
+  endif()
+  if (${_NAME}_FIND_REQUIRED)
+    message(FATAL_ERROR "${__msg}")
+  else ()
+    if (NOT ${_NAME}_FIND_QUIETLY)
+      message(STATUS "${__msg}")
+    endif ()
+  endif ()
+endmacro()
+
+
+# internal helper macro to generate the failure message when used in CONFIG_MODE:
+macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
+  # <PackageName>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
+  if(${_NAME}_CONFIG)
+    _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
+  else()
+    # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
+    # List them all in the error message:
+    if(${_NAME}_CONSIDERED_CONFIGS)
+      set(configsText "")
+      list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
+      math(EXPR configsCount "${configsCount} - 1")
+      foreach(currentConfigIndex RANGE ${configsCount})
+        list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
+        list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
+        string(APPEND configsText "\n    ${filename} (version ${version})")
+      endforeach()
+      if (${_NAME}_NOT_FOUND_MESSAGE)
+        if (FPHSA_REASON_FAILURE_MESSAGE)
+          string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n    ")
+        else()
+          set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}")
+        endif()
+      else()
+        string(APPEND configsText "\n")
+      endif()
+      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}")
+
+    else()
+      # Simple case: No Config-file was found at all:
+      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
+    endif()
+  endif()
+endmacro()
+
+
+function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)
+
+# Set up the arguments for `cmake_parse_arguments`.
+  set(options  CONFIG_MODE  HANDLE_COMPONENTS)
+  set(oneValueArgs  FAIL_MESSAGE  REASON_FAILURE_MESSAGE VERSION_VAR  FOUND_VAR)
+  set(multiValueArgs REQUIRED_VARS)
+
+# Check whether we are in 'simple' or 'extended' mode:
+  set(_KEYWORDS_FOR_EXTENDED_MODE  ${options} ${oneValueArgs} ${multiValueArgs} )
+  list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)
+
+  if(${INDEX} EQUAL -1)
+    set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
+    set(FPHSA_REQUIRED_VARS ${ARGN})
+    set(FPHSA_VERSION_VAR)
+  else()
+    cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}"  ${_FIRST_ARG} ${ARGN})
+
+    if(FPHSA_UNPARSED_ARGUMENTS)
+      message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
+    endif()
+
+    if(NOT FPHSA_FAIL_MESSAGE)
+      set(FPHSA_FAIL_MESSAGE  "DEFAULT_MSG")
+    endif()
+
+    # In config-mode, we rely on the variable <PackageName>_CONFIG, which is set by find_package()
+    # when it successfully found the config-file, including version checking:
+    if(FPHSA_CONFIG_MODE)
+      list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
+      list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
+      set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
+    endif()
+
+    if(NOT FPHSA_REQUIRED_VARS)
+      message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
+    endif()
+  endif()
+
+# now that we collected all arguments, process them
+
+  if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
+    set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
+  endif()
+
+  list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)
+
+  string(TOUPPER ${_NAME} _NAME_UPPER)
+  string(TOLOWER ${_NAME} _NAME_LOWER)
+
+  if(FPHSA_FOUND_VAR)
+    if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$"  OR  FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
+      set(_FOUND_VAR ${FPHSA_FOUND_VAR})
+    else()
+      message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
+    endif()
+  else()
+    set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
+  endif()
+
+  # collect all variables which were not found, so they can be printed, so the
+  # user knows better what went wrong (#6375)
+  set(MISSING_VARS "")
+  set(DETAILS "")
+  # check if all passed variables are valid
+  set(FPHSA_FOUND_${_NAME} TRUE)
+  foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
+    if(NOT ${_CURRENT_VAR})
+      set(FPHSA_FOUND_${_NAME} FALSE)
+      string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
+    else()
+      string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
+    endif()
+  endforeach()
+  if(FPHSA_FOUND_${_NAME})
+    set(${_NAME}_FOUND TRUE)
+    set(${_NAME_UPPER}_FOUND TRUE)
+  else()
+    set(${_NAME}_FOUND FALSE)
+    set(${_NAME_UPPER}_FOUND FALSE)
+  endif()
+
+  # component handling
+  unset(FOUND_COMPONENTS_MSG)
+  unset(MISSING_COMPONENTS_MSG)
+
+  if(FPHSA_HANDLE_COMPONENTS)
+    foreach(comp ${${_NAME}_FIND_COMPONENTS})
+      if(${_NAME}_${comp}_FOUND)
+
+        if(NOT DEFINED FOUND_COMPONENTS_MSG)
+          set(FOUND_COMPONENTS_MSG "found components:")
+        endif()
+        string(APPEND FOUND_COMPONENTS_MSG " ${comp}")
+
+      else()
+
+        if(NOT DEFINED MISSING_COMPONENTS_MSG)
+          set(MISSING_COMPONENTS_MSG "missing components:")
+        endif()
+        string(APPEND MISSING_COMPONENTS_MSG " ${comp}")
+
+        if(${_NAME}_FIND_REQUIRED_${comp})
+          set(${_NAME}_FOUND FALSE)
+          string(APPEND MISSING_VARS " ${comp}")
+        endif()
+
+      endif()
+    endforeach()
+    set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
+    string(APPEND DETAILS "[c${COMPONENT_MSG}]")
+  endif()
+
+  # version handling:
+  set(VERSION_MSG "")
+  set(VERSION_OK TRUE)
+
+  # check with DEFINED here as the requested or found version may be "0"
+  if (DEFINED ${_NAME}_FIND_VERSION)
+    if(DEFINED ${FPHSA_VERSION_VAR})
+      set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})
+
+      if(${_NAME}_FIND_VERSION_EXACT)       # exact version required
+        # count the dots in the version string
+        string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
+        # add one dot because there is one dot more than there are components
+        string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
+        if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
+          # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
+          # is at most 4 here. Therefore a simple lookup table is used.
+          if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
+            set(_VERSION_REGEX "[^.]*")
+          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
+            set(_VERSION_REGEX "[^.]*\\.[^.]*")
+          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
+            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
+          else ()
+            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
+          endif ()
+          string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
+          unset(_VERSION_REGEX)
+          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
+            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
+            set(VERSION_OK FALSE)
+          else ()
+            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
+          endif ()
+          unset(_VERSION_HEAD)
+        else ()
+          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
+            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
+            set(VERSION_OK FALSE)
+          else ()
+            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
+          endif ()
+        endif ()
+        unset(_VERSION_DOTS)
+
+      else()     # minimum version specified:
+        if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
+          set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
+          set(VERSION_OK FALSE)
+        else ()
+          set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
+        endif ()
+      endif()
+
+    else()
+
+      # if the package was not found, but a version was given, add that to the output:
+      if(${_NAME}_FIND_VERSION_EXACT)
+         set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
+      else()
+         set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
+      endif()
+
+    endif()
+  else ()
+    # Check with DEFINED as the found version may be 0.
+    if(DEFINED ${FPHSA_VERSION_VAR})
+      set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
+    endif()
+  endif ()
+
+  if(VERSION_OK)
+    string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
+  else()
+    set(${_NAME}_FOUND FALSE)
+  endif()
+
+
+  # print the result:
+  if (${_NAME}_FOUND)
+    FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
+  else ()
+
+    if(FPHSA_CONFIG_MODE)
+      _FPHSA_HANDLE_FAILURE_CONFIG_MODE()
+    else()
+      if(NOT VERSION_OK)
+        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
+      else()
+        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
+      endif()
+    endif()
+
+  endif ()
+
+  set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
+  set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
+endfunction()
--- a/CMakeModules/FindPackageMessage.cmake
+++ b/CMakeModules/FindPackageMessage.cmake
@@ -0,0 +1,48 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindPackageMessage
+------------------
+
+.. code-block:: cmake
+
+  find_package_message(<name> "message for user" "find result details")
+
+This function is intended to be used in FindXXX.cmake modules files.
+It will print a message once for each unique find result.  This is
+useful for telling the user where a package was found.  The first
+argument specifies the name (XXX) of the package.  The second argument
+specifies the message to display.  The third argument lists details
+about the find result so that if they change the message will be
+displayed again.  The macro also obeys the QUIET argument to the
+find_package command.
+
+Example:
+
+.. code-block:: cmake
+
+  if(X11_FOUND)
+    find_package_message(X11 "Found X11: ${X11_X11_LIB}"
+      "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
+  else()
+   ...
+  endif()
+#]=======================================================================]
+
+function(find_package_message pkg msg details)
+  # Avoid printing a message repeatedly for the same find result.
+  if(NOT ${pkg}_FIND_QUIETLY)
+    string(REPLACE "\n" "" details "${details}")
+    set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
+    if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
+      # The message has not yet been printed.
+      message(STATUS "${msg}")
+
+      # Save the find details in the cache to avoid printing the same
+      # message again.
+      set("${DETAILS_VAR}" "${details}"
+        CACHE INTERNAL "Details about finding ${pkg}")
+    endif()
+  endif()
+endfunction()
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@@ -0,0 +1,22 @@
+FROM ubuntu:19.10
+MAINTAINER simon987 <me@simon987.net>
+
+RUN apt update
+RUN apt install -y libglib2.0-0 libcurl4 libmagic1 libharfbuzz-bin libopenjp2-7 libarchive13 liblzma5 libzstd1 liblz4-1 \
+ curl libtiff5 libpng16-16 libpcre3
+
+RUN mkdir -p /usr/share/tessdata && \
+    cd /usr/share/tessdata/ && \
+    curl -o /usr/share/tessdata/hin.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/hin.traineddata &&\
+    curl -o /usr/share/tessdata/jpn.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/jpn.traineddata &&\
+    curl -o /usr/share/tessdata/eng.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata &&\
+    curl -o /usr/share/tessdata/fra.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/fra.traineddata &&\
+    curl -o /usr/share/tessdata/rus.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/rus.traineddata &&\
+    curl -o /usr/share/tessdata/spa.traineddata https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/spa.traineddata && ls -lh
+
+ADD sist2 /root/sist2
+
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+
+ENTRYPOINT ["/root/sist2"]
--- a/Docker/build.sh
+++ b/Docker/build.sh
@@ -0,0 +1,15 @@
+rm ./sist2
+cp ../sist2 .
+strip sist2
+
+version=$(./sist2 --version)
+
+echo "Version ${version}"
+docker build . -t simon987/sist2:${version} -t simon987/sist2:latest \
+  -t docker.pkg.github.com/simon987/sist2/sist2:latest -t docker.pkg.github.com/simon987/sist2/sist2:${version}
+docker push simon987/sist2:${version}
+docker push simon987/sist2:latest
+docker push docker.pkg.github.com/simon987/sist2/sist2:latest
+docker push docker.pkg.github.com/simon987/sist2/sist2:${version}
+
+docker run --rm -it simon987/sist2 -v
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
 [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
+[![Development snapshots](https://ci.simon987.net/app/rest/builds/buildType(Sist2_Build)/statusIcon)](https://files.simon987.net/artifacts/Sist2/Build/)

 # sist2

@@ -7,64 +8,106 @@ sist2 (Simple incremental search tool)

 *Warning: sist2 is in early development*

+![sist2.png](sist2.png)
+
 ## Features

-* Fast, low memory usage
+* Fast, low memory usage, multi-threaded
+* Mobile-friendly Web interface
 * Portable (all its features are packaged in a single executable)
-* Extracts text from common file types\*
-* Generates thumbnails\*
+* Extracts text from common file types \*
+* Generates thumbnails \*
 * Incremental scanning
+* Automatic tagging from file attributes via [user scripts](scripting/README.md)
+* Recursive scan inside archive files \*\*
+* OCR support with tesseract \*\*\*


 \* See [format support](#format-support)    
+\*\* See [Archive files](#archive-files)    
+\*\*\* See [OCR](#ocr)    

 ## Getting Started

-1. Have an [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) instance running
-1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases)
+1. Have an Elasticsearch (>= 6.X.X) instance running
+    1. Download [from official website](https://www.elastic.co/downloads/elasticsearch)
+    1. *(or)* Run using docker:
+        ```bash
+       docker run -d --name es1 --net sist2_net -p 9200:9200 \
+            -e "discovery.type=single-node" elasticsearch:7.5.2
+        ```
+    1. *(or)* Run using docker-compose:
+        ```yaml
+          elasticsearch:
+            image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
+            environment:
+              - discovery.type=single-node
+              - "ES_JAVA_OPTS=-Xms1G -Xmx2G"
+        ```
+1. Download sist2 executable
+    1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
+    1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
+    1. *(or)* `docker pull simon987/sist2:latest`

-*Windows users*: `sist2` runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)
+1. See [Usage guide](USAGE.md)
   
-*Mac users*: See [#1](https://github.com/simon987/sist2/issues/1)
+
+\* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)    


 ## Example usage

-![demo](demo.gif)
+See [Usage guide](USAGE.md) for more details

-See help page `sist2 --help` for more details.
+1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
+1. Push index to Elasticsearch: `sist2 index ./docs_idx`
+1. Start web interface: `sist2 web ./docs_idx`

-**Scan a directory**
-```bash
-sist2 scan ~/Documents -o ./orig_idx/
-sist2 scan --threads 4 --content-size 16384 /mnt/Pictures
-sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
-```
-
-**Push index to Elasticsearch or file**
-```bash
-sist2 index --force-reset ./my_idx
-sist2 index --print ./my_idx > raw_documents.ndjson
-```
-
-**Start web interface**
-```bash
-sist2 web --bind 0.0.0.0 --port 4321 ./my_idx1 ./my_idx2 ./my_idx3
-```

 ## Format support

 File type | Library | Content | Thumbnail | Metadata
 :---|:---|:---|:---|:---
-pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | *planned* |
-`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
-`video/*` | libav | - | yes, `jpeg` | *planned* |
-`image/*` | libav | - | yes, `jpeg` | *planned* |
+pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
+`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
+`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
+`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
 ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 `text/plain` | *(none)* | yes | no | - |
-docx, xlsx, pptx |  | *planned* | no | *planned* |
+tar, zip, rar, 7z, ar ...  | Libarchive | yes\* | - | no |
+docx, xlsx, pptx | *(none)* | yes | no | no |
+
+\* *See [Archive files](#archive-files)*
+ 
+### Archive files
+**sist2** will scan files stored into archive files (zip, tar, 7z...) as if
+they were directly in the file system. Recursive (archives inside archives)
+scan is also supported.
+
+**Limitations**:
+* Parsing media files with formats that require
+*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
+* Archive files are scanned sequentially, by a single thread. On systems where
+**sist2** is not I/O bound, scans might be faster when larger archives are split
+ into smaller parts.
+
+To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
 
 
+### OCR
+
+You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
+`--ocr <lang>` option. Download the language data files with your
+package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
+
+The `simon987/sist2` image comes with common languages 
+(hin, jpn, eng, fra, rus, spa) pre-installed.
+
+Examples
+```bash
+sist2 scan --ocr jpn ~/Books/Manga/
+sist2 scan --ocr eng ~/Books/Textbooks/
+```


 ## Build from source
@@ -76,10 +119,12 @@ binaries.

    *(Debian)*
    ```bash
-    apt install git cmake pkg-config libglib2.0-dev\
-        libssl-dev uuid-dev libavformat-dev libswscale-dev \
-        python3 libmagic-dev libfreetype6-dev libcurl-dev \
-        libbz2-dev yasm
+    apt install git cmake pkg-config libglib2.0-dev \
+        libssl-dev uuid-dev python3 libmagic-dev libfreetype6-dev \
+        libcurl4-openssl-dev libbz2-dev yasm libharfbuzz-dev ragel \
+        libarchive-dev libtiff5 libpng16-16 libpango1.0-dev \
+        libxml2-dev libopenjp2-7-dev libleptonica-dev
+   ```

 2. Build
    ```bash
--- a/USAGE.md
+++ b/USAGE.md
@@ -0,0 +1,275 @@
+# Usage
+
+*More examples (specifically with docker/compose) are in progress*
+
+* [scan](#scan)
+    * [options](#scan-options)
+    * [examples](#scan-examples)
+    * [index format](#index-format)
+* [index](#index)
+    * [options](#index-options)
+    * [examples](#index-examples)
+* [web](#web)
+    * [options](#web-options)
+    * [examples](#web-examples)
+    * [rewrite_url](#rewrite_url)
+    * [link to specific indices](#link-to-specific-indices)
+
+```
+Usage: sist2 scan [OPTION]... PATH
+   or: sist2 index [OPTION]... INDEX
+   or: sist2 web [OPTION]... INDEX...
+Lightning-fast file system indexer and search tool.
+
+    -h, --help            show this help message and exit
+    -v, --version         Show version and exit
+    --verbose             Turn on logging
+    --very-verbose        Turn on debug messages
+
+Scan options
+    -t, --threads=<int>   Number of threads. DEFAULT=1
+    -q, --quality=<flt>   Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
+    --size=<int>          Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
+    --content-size=<int>  Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
+    --incremental=<str>   Reuse an existing index and only scan modified files.
+    -o, --output=<str>    Output directory. DEFAULT=index.sist2/
+    --rewrite-url=<str>   Serve files from this url instead of from disk.
+    --name=<str>          Index display name. DEFAULT: (name of the directory)
+    --depth=<int>         Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
+    --archive=<str>       Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
+    --ocr=<str>           Tesseract language (use tesseract --list-langs to see which are installed on your machine)
+    -e, --exclude=<str>   Files that match this regex will not be scanned
+    --fast                Only index file names & mime type
+
+Index options
+    --es-url=<str>        Elasticsearch url with port. DEFAULT=http://localhost:9200
+    -p, --print           Just print JSON documents to stdout.
+    --script-file=<str>   Path to user script.
+    --batch-size=<int>    Index batch size. DEFAULT: 100
+    -f, --force-reset     Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
+
+Web options
+    --es-url=<str>        Elasticsearch url. DEFAULT=http://localhost:9200
+    --bind=<str>          Listen on this address. DEFAULT=localhost
+    --port=<str>          Listen on this port. DEFAULT=4090
+    --auth=<str>          Basic auth in user:password format
+Made by simon987 <me@simon987.net>. Released under GPL-3.0
+
+```
+
+## Scan
+
+### Scan options
+
+* `-t, --threads` 
+      Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
+* `-q, --quality` 
+    Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
+* `--size` 
+    Thumbnail size in pixels.
+* `--content-size` 
+    Number of bytes of text to be extracted from the content of files (plain text and PDFs).
+    Repeated whitespace and special characters do not count toward this limit.
+* `--incremental`
+    Specify an existing index. Information about files in this index that were not modified (based on *mtime* attribute)
+    will be copied to the new index and will not be parsed again.
+* `-o, --output` Output directory. 
+* `--rewrite-url` Set the `rewrite_url` option for the web module (See [rewrite_url](#rewrite_url)) 
+* `--name` Set the `name` option for the web module
+* `--depth` Maximum scan dept. Set to 0 only scan files directly in the root directory, set to -1 for infinite depth
+* `--archive` Archive file mode.
+    * skip: Don't parse
+    * list: Only get file names as text
+    * shallow: Don't parse archives inside archives.
+    * recurse: Scan archives recursively (default)
+* `--ocr` See [OCR](README.md#OCR)
+* `-e, --exclude` Regex pattern to exclude files. A file is excluded if the pattern matches any 
+    part of the full absolute path.
+    
+    Examples: 
+    * `-e ".*\.ttf"`: Ignore ttf files
+    * `-e ".*\.(ttf|rar)"`: Ignore ttf and rar files
+    * `-e "^/mnt/backups/"`: Ignore all files in the `/mnt/backups/` directory
+    * `-e "^/mnt/Data[12]/"`: Ignore all files in the `/mnt/Data1/` and `/mnt/Data2/` directory
+    * `-e "(^/usr/)|(^/var/)|(^/media/DRIVE-A/tmp/)|(^/media/DRIVE-B/Trash/)"` Exclude the
+     `/usr`, `/var`, `/media/DRIVE-A/tmp`, `/media/DRIVE-B/Trash` directories
+* `--fast` Only index file names and mime type
+
+### Scan examples
+
+Simple scan
+```bash
+sist2 scan ~/Documents
+
+sist2 scan \
+    --threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
+    --name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
+    ~/Documents -o ./documents.idx/
+```
+
+Incremental scan
+```
+sist2 scan --incremental ./orig_idx/ -o ./updated_idx/ ~/Documents
+```
+
+### Index format
+
+A typical `binary` type index structure looks like this:
+```
+documents.idx/
+├── descriptor.json
+├── _index_139965416830720
+├── _index_139965425223424
+├── _index_139965433616128
+├── _index_139965442008832
+└── thumbs
+    ├── data.mdb
+    └── lock.mdb
+```
+
+The `_index_*` files contain the raw binary index data and are not meant to be
+read by other applications. The format is generally compatible across different 
+sist2 versions.
+
+The `thumbs/` folder is a [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database)
+database containing the thumbnails.
+
+The `descriptor.json` file contains general information about the index. The 
+following fields are safe to modify manually: `root`, `name`, [rewrite_url](#rewrite_url) and `timestamp`.
+
+
+*Advanced usage*
+
+Instead of using the `scan` module, you can also import an index generated
+by a third party application. The 'external' index must have the following format:
+
+```
+my_index/
+├── descriptor.json
+├── _index_0
+└── thumbs
+    ├── data.mdb
+    └── lock.mdb
+```
+
+*descriptor.json*:
+```json
+{
+    "uuid": "<valid UUID4>",
+    "version": "_external_v1",
+    "root": "(optional)",
+    "name": "<name>",
+    "rewrite_url": "(optional)",
+    "type": "json",
+    "timestamp": 1578971024
+}
+```
+
+*_index_0*: NDJSON format (One json object per line)
+
+```json
+{
+  "_id": "unique uuid for the file",
+  "index": "index uuid4 (same one as descriptor.json!)",
+  "mime": "application/x-cbz",
+  "size": 14341204,
+  "mtime": 1578882996,
+  "extension": "cbz",
+  "name": "my_book",
+  "path": "path/to/books",
+  "content": "text contents of the book",
+  "title": "Title of the book",
+  "tag": ["genre.fiction", "author.someguy", "etc..."],
+  "_keyword": [
+    {"k": "ISBN", "v": "ABCD34789231"}
+  ],
+  "_text": [
+    {"k": "other", "v": "This will be indexed as text"}
+  ]
+}
+```
+
+You can find the full list of supported fields [here](src/io/serialize.c#L90)
+
+The `_keyword.*` items will be indexed and searchable as **keyword** fields (only full matches allowed).
+The `_text.*` items will be indexed and searchable as **text** fields (fuzzy searching allowed)
+
+
+*thumbs/*:
+
+LMDB key-value store. Keys are **binary** 128-bit UUID4s (`_id` field)
+and values are raw image bytes.
+
+Importing an external `binary` type index is technically possible but
+it is currently unsupported and has no guaranties of back/forward compatibility.
+
+
+## Index
+### Index options
+ * `--es-url` 
+ Elasticsearch url and port. If you are using docker, make sure that both containers are on the
+ same network.
+ * `-p, --print` 
+    Print index in JSON format to stdout.
+ * `--script-file` 
+    Path to user script. See [Scripting](scripting/README.md).
+ * `--batch-size=<int>` 
+    Index batch size. Indexing is generally faster with larger batches, but payloads that
+    are too large will fail and additional overhead for retrying with smaller sizes may slow
+    down the process.
+ * `-f, --force-reset` 
+    Reset Elasticsearch mappings and settings.
+    **(You must use this option the first time you use the index command)**.
+    
+### Index examples
+
+**Push to elasticsearch**
+```bash
+sist2 index --force-reset --batch-size 1000 --es-url http://localhost:9200 ./my_index/
+sist2 index ./my_index/
+```
+
+**Save index in JSON format**
+```bash
+sist2 index --print ./my_index/ > my_index.ndjson
+```
+
+**Inspect contents of an index**
+```bash
+sist2 index --print ./my_index/ | jq | less
+```
+
+## Web
+
+### Web options
+ * `--es-url=<str>` Elasticsearch url.
+ * `--bind=<str>` Listen on this address.
+ * `--port=<str>` Listen on this port.
+ * `--auth=<str>` Basic auth in user:password format
+ 
+### Web examples
+
+**Single index**
+```bash
+sist2 web --auth admin:hunter2 --bind 0.0.0.0 --port 8888 my_index
+```
+
+**Multiple indices**
+```bash
+# Indices will be displayed in this order in the web interface
+sist2 web index1 index2 index3 index4
+```
+
+### rewrite_url
+
+When the `rewrite_url` field is not empty, the web module ignores the `root`
+field and will return a HTTP redirect to `<rewrite_url><path>/<name><extension>`
+instead of serving the file from disk. 
+Both the `root` and `rewrite_url` fields are safe to manually modify from the 
+`descriptor.json` file.
+
+### Link to specific indices
+
+To link to specific indices, you can add a list of comma-separated index name to 
+the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
+not displayed.
--- a/2
+++ b/2
--- a/2
+++ b/2
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+./scripts/get_static_libs.sh
+
+rm -rf CMakeFiles CmakeCache.txt
+cmake -DSIST_DEBUG=off .
+make
+strip sist2
+
+rm -rf CMakeFiles CmakeCache.txt
+cmake -DSIST_DEBUG=on .
+make
--- a/demo.gif
+++ b/demo.gif
--- a/lib/bzip2-1.0.6
+++ b/lib/bzip2-1.0.6
--- a/lib/ffmpeg
+++ b/lib/ffmpeg
--- a/lib/harfbuzz
+++ b/lib/harfbuzz
--- a/lib/leptonica
+++ b/lib/leptonica
--- a/lib/libmagic
+++ b/lib/libmagic
--- a/lib/libpng
+++ b/lib/libpng
--- a/lib/libtiff
+++ b/lib/libtiff
--- a/lib/mupdf
+++ b/lib/mupdf
--- a/lib/onion
+++ b/lib/onion
--- a/lib/openjpeg
+++ b/lib/openjpeg
--- a/lib/tesseract
+++ b/lib/tesseract
--- a/mime.csv
+++ b/mime.csv
@@ -2,10 +2,14 @@ application/arj, arj
 application/base64, mme
 application/binhex, hqx
 application/book, boo|book
+application/CDFV2-corrupt,
 application/CDFV2, sdv
 application/clariscad, ccad
 application/commonground, dp
+application/csv,
+application/dicom, dcm
 application/drafting, drw
+application/epub+zip, epub
 application/freeloader, frl
 application/futuresplash, spl
 application/groupwise, vew
@@ -17,7 +21,6 @@ application/inf, inf
 application/java-archive, jar
 application/java, class
 application/javascript,
-application/x-archive, a
 application/json, json
 application/marc, mrc
 application/mbedlet, mbd
@@ -27,7 +30,9 @@ application/msword, doc|dot|w6w|wiz|word
 application/netmc, mcp
 application/octet-stream, bin|dump|gpg
 application/oda, oda
+application/ogg, ogv
 application/pdf, pdf
+application/pgp-keys,
 application/pgp-signature, pgp
 application/pkcs7-signature, p7s
 application/pkix-cert, cer|crt
@@ -43,6 +48,10 @@ application/vda, vda
 application/vnd.fdf, fdf
 application/vnd.font-fontforge-sfd, sfd
 application/vnd.hp-hpgl, hgl|hpg|hpgl
+application/vnd.iccprofile, icm
+application/vnd.iccprofile, icm
+application/vnd.lotus-1-2-3,
+application/vnd.ms-cab-compressed, cab
 application/vnd.ms-excel, xlb|xlc|xll|xlm|xls|xlw
 application/vnd.ms-fontobject, eot
 application/vnd.ms-opentype, otf
@@ -54,45 +63,75 @@ application/vnd.ms-project, mpp
 application/vnd.oasis.opendocument.base, odb
 application/vnd.oasis.opendocument.formula, odf
 application/vnd.oasis.opendocument.graphics, odg
+application/vnd.oasis.opendocument.presentation, odp
+application/vnd.oasis.opendocument.spreadsheet, ods
 application/vnd.oasis.opendocument.text, odt
+application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
+application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
+application/vnd.symbian.install,
+application/vnd.tcpdump.pcap, pcap
 application/vnd.wap.wmlc, wmlc
 application/vnd.wap.wmlscriptc, wmlsc
 application/vnd.xara, web
 application/vocaltec-media-desc, vmd
 application/vocaltec-media-file, vmf
+application/warc, warc
+application/winhelp, hlp
 application/wordperfect6.0, w60
 application/wordperfect6.1, w61
 application/wordperfect, wp|wp5|wp6|wpd
 application/x-123, wk1
+application/x-7z-compressed, 7z
 application/x-aim, aim
+application/x-apple-diskimage,
+application/x-arc,
+application/x-archive, a
+application/x-atari-7800-rom, a78
 application/x-authorware-bin, aab
 application/x-authorware-map, aam
 application/x-authorware-seg, aas
+application/x-avira-qua,
 application/x-bcpio, bcpio
 application/x-bittorrent, torrent
 application/x-bsh, bsh
 application/x-bytecode.python, pyc
 application/x-bzip2, boz|bz2
 application/x-bzip, bz
+application/x-cbr, cbr
+application/x-cbz, cbz
 application/x-cdlink, vcd
 application/x-chat, cha|chat
+application/x-chrome-extension,
 application/x-cocoa, cco
 application/x-conference, nsc
+application/x-coredump,
 application/x-cpio, cpio
 application/x-dbf, dbf
 application/x-dbt,
+application/x-debian-package, deb
 application/x-deepv, deepv
 application/x-director, dcr|dir|dxr
+application/x-dmp, dmp
+application/x-dosdriver,
 application/x-dosexec, dll
 application/x-dvi, dvi
 application/x-elc, elc
+application/x-empty,
 application/x-envoy, env|evy
 application/x-esrehber, es
 application/x-excel, xla|xld|xlk|xlt|xlv
 application/x-executable, exe
+application/x-font-gdos,
+application/x-font-pf2, pf2
+application/x-font-pfm, pfm
 application/x-font-sfn,
-application/x-font-ttf, ttf
+application/x-font-ttf, ttf|ttc
+application/x-fptapplication/x-dbt,
 application/x-freelance, pre
+application/x-gamecube-rom,
+application/x-gdbm,
+application/x-gettext-translation,
 application/x-git,
 application/x-gsp, gsp
 application/x-gss, gss
@@ -102,46 +141,67 @@ application/x-hdf, hdf
 application/x-helpfile, help
 application/x-httpd-imap, imap
 application/x-ima, ima
+application/x-innosetup,
 application/x-internett-signup, ins
 application/x-inventor, iv
 application/x-ip2, ip
 application/x-java-applet,
 application/x-java-commerce, jcm
 application/x-java-image,
+application/x-java-jmod, jmod
 application/x-java-keystore,
+application/x-kdelnk,
 application/x-koan, skd|skm|skp|skt
 application/x-latex, latex|ltx
 application/x-livescreen, ivy
 application/x-lotus, wq1
+application/x-lz4+json, jsonlz4
+application/x-lz4, lz4
+application/x-lz4, lz4
+application/x-lzh-compressed,
 application/x-lzh, lzh
+application/x-lzip, lz
+application/x-lzma, lzma
+application/x-lzop, lzo
 application/x-lzx, lzx
 application/x-mach-binary, jnilib|dylib
 application/x-mach-executable,
 application/x-magic-cap-package-1.0, mc$
 application/x-mathcad, mcd
+application/x-maxis-dbpf,
 application/x-meme, mm
 application/x-midi, midi
 application/x-mif, mif
 application/x-mix-transfer, nix
 application/xml, opf
+application/x-mobipocket-ebook, mobi
+application/x-msaccess, accdb
+application/x-ms-compress-szdd, fon
 application/x-ms-pdb, pdb
+application/x-ms-reader, lit
+application/x-n64-rom, z64
 application/x-navi-animation, ani
 application/x-navidoc, nvd
 application/x-navimap, map
 application/x-navistyle, stl
+application/x-nes-rom, nes
 application/x-netcdf, cdf|nc
 application/x-newton-compatible-pkg, pkg
+application/x-nintendo-ds-rom,
 application/x-object, o
 application/x-omcdatamaker, omcd
 application/x-omc, omc
 application/x-omcregerator, omcr
 application/x-pagemaker, pm4|pm5
 application/x-pcl, pcl
+application/x-pgp-keyring,
 application/x-pixclscript, plx
 application/x-pkcs7-certreqresp, p7r
 application/x-pkcs7-signature, p7a
 application/x-project, mpc|mpt|mpv|mpx
 application/x-qpro, wb1
+application/x-rar, rar
+application/x-rpm, rpm
 application/x-sdp, sdp
 application/x-sea, sea
 application/x-seelogo, sl
@@ -149,12 +209,17 @@ application/x-setupscript,
 application/x-sharedlib, so
 application/x-shar, shar
 application/x-shockwave-flash, swf
+application/x-snappy-framed,
 application/x-sprite, spr|sprite
 application/x-sqlite3,
+application/x-stargallery-thm,
+application/x-stuffit, sit
 application/x-sv4cpio, sv4cpio
 application/x-sv4crc, sv4crc
 application/x-tar, tar
 application/x-tbook, sbk|tbk
+application/x-terminfo,
+application/x-terminfo2,
 application/x-texinfo, texi|texinfo
 application/x-tex-tfm, tfm
 application/x-ustar, ustar
@@ -163,16 +228,22 @@ application/x-vnd.audioexplosion.mzz, mzz
 application/x-vnd.ls-xpix, xpix
 application/x-vrml, vrml
 application/x-wais-source, src|wsrc
+application/x-wine-extension-ini,
 application/x-wintalk, wtk
 application/x-world, svr
 application/x-wri, wri
 application/x-x509-ca-cert, der
 application/x-xz, xz
+application/x-zip,
+application/x-zstd, zst
 application/zip, zip
+application/zlib, z
+!audio/basic, au
 audio/it, it
 audio/make, funk|my|pfunk
 audio/midi, kar
 audio/mid, rmi
+audio/mp4, m4b
 audio/mpeg, m2a|mpa
 audio/ogg, ogg
 audio/s3m, s3m
@@ -180,7 +251,10 @@ audio/tsp-audio, tsi
 audio/tsplayer, tsp
 audio/vnd.qcelp, qcp
 audio/voxware, vox
+audio/x-aiff, aiff|aif
+audio/x-flac, flac
 audio/x-gsm, gsd|gsm
+audio/x-hx-aac-adts,
 audio/x-jam, jam
 audio/x-liveaudio, lam
 audio/x-m4a, m4a
@@ -194,17 +268,24 @@ audio/x-nspaudio, lma
 audio/x-pn-realaudio, ram|rm|rmm|rmp
 audio/x-psid, sid
 audio/x-realaudio, ra
+audio/x-s3m,
 audio/x-twinvq-plugin, vqe|vql
 audio/x-twinvq, vqf
 audio/x-voc, voc
 audio/x-wav, wav
+!audio/x-xbox360-executable, xex
+!audio/x-xbox-executable, xbe
 font/otf,
 font/sfnt,
+font/woff2, woff2
+font/woff, woff
+image/bmp,
 image/cmu-raster, rast
 image/fif, fif
 image/florian, flo|turbot
 image/g3fax, g3
 image/gif, gif
+image/heic, heic
 image/ief, ief|iefs
 image/jpeg, jfif|jfif-tbnl|jpe|jpeg|jpg
 image/jutvision, jut
@@ -213,6 +294,9 @@ image/pict, pic|pict
 image/png, png|x-png
 !image/svg, svg
 !image/svg+xml,
+image/tiff,
+!image/vnd.adobe.photoshop, psd
+!image/vnd.djvu, djvu
 image/vnd.fpx, fpx
 image/vnd.microsoft.icon,
 image/vnd.rn-realflash, rf
@@ -220,9 +304,15 @@ image/vnd.rn-realpix, rp
 image/vnd.wap.wbmp, wbmp
 image/vnd.xiff, xif
 image/webp, webp
+image/wmf,
+image/x-3ds, 3ds
+image/x-award-bioslogo,
 image/x-cmu-raster, ras
+image/x-cur, tga
 image/x-dwg, dwg|dxf|svf
 image/x-eps,
+image/x-exr, exr
+image/x-gem,
 image/x-icns,
 !image/x-icon, ico
 image/x-jg, art
@@ -236,34 +326,33 @@ image/x-portable-graymap, pgm
 image/x-portable-pixmap, ppm
 image/x-quicktime, qif|qti|qtif
 image/x-rgb, rgb
+image/x-tga,
 image/x-tiff, tif|tiff
-image/tiff,
+image/x-win-bitmap,
 !image/x-xcf, xcf
 !image/x-xpixmap, xpm
+image/x-xwindowdump, xwd
+message/news,
 message/rfc822, mht|mhtml|mime
 model/vnd.dwf, dwf
+model/vnd.gdl, gdl
+model/vnd.gs.gdl, gdsl
 model/vrml, wrz
 model/x-pov, pov
 text/asp, asp
 text/css, css
-text/x-sass, sass
-text/x-scss, scss
 text/html, acgi|htm|html|htmls|htx|shtml
 text/javascript, js
 text/mcf, mcf
 text/pascal, pas
-text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
+text/PGP,
+text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u|csv|eml
 text/richtext, rt|rtf|rtx
+text/rtf,
 text/scriplet, wsc
-text/x-awk, awk
-!video/x-jng, jng
-video/x-mng, mng
-image/x-cur, tga
-image/x-xwindowdump, xwd
-!image/vnd.adobe.photoshop, psd
 text/tab-separated-values, tsv
 text/troff, man|me|ms|roff|t|tr
-text/uri-list, uni|unis|uri|uris
+text/uri-list, uji|unis|uri|uris
 text/vnd.abc, abc
 text/vnd.fmi.flexstor, flx
 text/vnd.wap.wmlscript, wmls
@@ -272,6 +361,7 @@ text/webviewhtml, htt
 text/x-Algol68,
 text/x-asm, asm|s
 text/x-audiosoft-intra, aip
+text/x-awk, awk
 text/x-bcpl,
 text/x-c, c|cc|h
 text/x-c++, cpp|cxx|c++
@@ -286,23 +376,31 @@ text/x-makefile, am|mak
 text/xml, xml|pom|iml|plist
 text/x-m, m
 text/x-msdos-batch, bat
+text/x-ms-regedit, reg
+text/x-objective-c,
 text/x-pascal, p
 text/x-perl, pl
 text/x-php, php
+text/x-po, po
 text/x-python, py
 text/x-ruby, rb
+text/x-sass, sass
+text/x-scss, scss
 text/x-server-parsed-html, ssi
 text/x-setext, etx
 text/x-sgml, sgm|sgml
 text/x-shellscript, sh
 text/x-speech, talk
+text/x-tcl,
 text/x-tex, tex
 text/x-uil, uil
 text/x-uuencode, uue
 text/x-vcalendar, vcs
+text/x-vcard, vcf
 video/animaflex, afl
 video/avi, avi
 video/avs-video, avs
+video/MP2T,
 video/mp4, mp4
 video/mpeg, m1v|m2v|mpe|mpeg|mpg
 video/quicktime, moov|mov|qt
@@ -317,43 +415,15 @@ video/x-atomic3d-feature, fmf
 video/x-dl, dl
 video/x-dv, dif|dv
 video/x-fli, fli
+video/x-flv, flv
 video/x-isvideo, isu
+!video/x-jng, jng
+video/x-m4v, m4v
+video/x-matroska, mkv
+video/x-mng, mng
 video/x-motion-jpeg, mjpg
-video/x-ms-asf, asf|asx
+video/x-ms-asf, asf|asx|wmv
+video/x-msvideo, divx
 video/x-qtc, qtc
 video/x-sgi-movie, movie|mv
-application/x-7z-compressed, 7z
-application/vnd.openxmlformats-officedocument.wordprocessingml.document, docx
-text/x-po, po
-application/x-rpm, rpm
-application/x-debian-package, deb
-application/vnd.iccprofile, icm
-application/dicom, dcm
-image/x-exr, exr
-application/vnd.iccprofile, icm
-video/x-matroska, mkv
-application/x-empty,
-model/vnd.gdl, gdl
-model/vnd.gs.gdl, gdsl
-font/woff, woff
-font/woff2, woff2
-application/epub+zip, epub
-application/x-mobipocket-ebook, mobi
-audio/x-flac, flac
-application/x-rar, rar
-video/x-msvideo, divx
-video/x-flv, flv
-application/x-kdelnk,
-text/x-tcl,
-application/ogg, ogv
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, xlsx
-application/vnd.ms-cab-compressed, cab
-audio/mp4, m4b
-!image/vnd.djvu, djvu
-application/x-ms-reader, lit
-application/CDFV2-corrupt,
-text/x-vcard, vcf
-application/x-innosetup,
-application/winhelp, hlp
-image/x-tga,
-application/x-wine-extension-ini,
+x-epoc/x-sisx-app,
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -1,31 +1,40 @@
 {
  "properties": {
+    "_tie": {
+      "type": "keyword",
+      "doc_values": true
+    },
+    "_depth": {
+      "type": "integer"
+    },
    "path": {
      "type": "text",
      "analyzer": "path_analyzer",
-      "copy_to": "suggest-path"
-    },
-    "suggest-path": {
-      "type": "completion",
-      "analyzer": "keyword"
+      "fielddata": true,
+      "index_prefixes": {}
    },
    "mime": {
      "type": "keyword"
    },
    "videoc": {
-      "type": "keyword"
+      "type": "keyword",
+      "index": false
    },
    "audioc": {
-      "type": "keyword"
+      "type": "keyword",
+      "index": false
    },
    "duration": {
-      "type": "float"
+      "type": "float",
+      "index": false
    },
    "width": {
-      "type": "integer"
+      "type": "integer",
+      "index": false
    },
    "height": {
-      "type": "integer"
+      "type": "integer",
+      "index": false
    },
    "mtime": {
      "type": "integer"
@@ -70,6 +79,23 @@
      "analyzer": "my_nGram",
      "type": "text"
    },
+    "_keyword.*":  {
+      "type": "keyword"
+    },
+    "_text.*":  {
+      "analyzer": "content_analyzer",
+      "type": "text",
+      "fields": {
+        "nGram": {
+          "type": "text",
+          "analyzer": "my_nGram"
+        }
+      }
+    },
+    "_url": {
+      "type": "keyword",
+      "index": false
+    },
    "content": {
      "analyzer": "content_analyzer",
      "type": "text",
@@ -80,6 +106,33 @@
          "analyzer": "my_nGram"
        }
      }
+    },
+    "tag": {
+      "type": "keyword"
+    },
+    "exif_make": {
+      "type": "text"
+    },
+    "exif_model": {
+      "type": "text"
+    },
+    "exif:software": {
+      "type": "text"
+    },
+    "exif_exposure_time": {
+      "type": "keyword"
+    },
+    "exif_fnumber": {
+      "type": "keyword"
+    },
+    "exif_iso_speed_ratings": {
+      "type": "keyword"
+    },
+    "exif_focal_length": {
+      "type": "keyword"
+    },
+    "exif_user_comment": {
+      "type": "text"
    }
  }
 }
--- a/schema/pipeline.json
+++ b/schema/pipeline.json
@@ -0,0 +1,10 @@
+{
+  "description": "Copy _id to _tie, save path depth",
+  "processors": [
+    {
+      "script": {
+        "source": "ctx._tie = ctx._id; ctx._depth = ctx.path.length() == 0 ? 0 : 1 + ctx.path.length() - ctx.path.replace(\"/\", \"\").length();"
+      }
+    }
+  ]
+}
--- a/schema/settings.json
+++ b/schema/settings.json
@@ -1,6 +1,6 @@
 {
  "index": {
-    "refresh_interval": "-1",
+    "refresh_interval": "30s",
    "codec": "best_compression"
  },
  "analysis": {
@@ -21,16 +21,24 @@
          "lowercase"
        ]
      },
+      "case_insensitive_kw_analyzer": {
+        "tokenizer": "keyword",
+        "filter": [
+          "lowercase"
+        ]
+      },
      "my_nGram": {
        "tokenizer": "my_nGram_tokenizer",
        "filter": [
-          "lowercase"
+          "lowercase",
+          "asciifolding"
        ]
      },
      "content_analyzer": {
        "tokenizer": "standard",
        "filter": [
-          "lowercase"
+          "lowercase",
+          "asciifolding"
        ]
      }
    }
--- a/scripting/README.md
+++ b/scripting/README.md
@@ -0,0 +1,152 @@
+## User scripts
+
+*This document is under construction, more in-depth guide coming soon*
+
+During the `index` step, you can use the `--script-file <script>` option to
+modify documents or add user tags. This option is mainly used to
+implement automatic tagging based on file attributes.
+
+The scripting language used 
+([Painless Scripting Language](https://www.elastic.co/guide/en/elasticsearch/painless/7.4/index.html)) 
+is very similar to Java, but you should be able to create user scripts
+without programming experience at all if you're somewhat familiar with
+regex.
+
+This is the base structure of the documents we're working with:
+```json
+{
+  "_id": "e171405c-fdb5-4feb-bb32-82637bc32084",
+  "_index": "sist2",
+  "_type": "_doc",
+  "_source": {
+    "index": "206b3050-e821-421a-891d-12fcf6c2db0d",
+    "mime": "application/json",
+    "size": 1799,
+    "mtime": 1545443685,
+    "extension": "md",
+    "name": "README",
+    "path": "sist2/scripting",
+    "content": "..."
+  }
+}
+```
+
+**Example script**
+
+This script checks if the `genre` attribute exists, if it does
+it adds the `genre.<genre>` tag. 
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+if (ctx._source?.genre != null) {
+    tags.add("genre." + ctx._source.genre.toLowerCase())
+}
+```
+
+You can use `.` to create a hierarchical tag tree:
+
+![scripting/genre_example](genre_example.png)
+
+
+To use regular expressions, you need to add this line in `/etc/elasticsearch/elasticsearch.yml`
+```yaml
+script.painless.regex.enabled: true
+```
+Or, if you're using docker add `-e "script.painless.regex.enabled=true"`
+
+**Tag color**
+
+You can specify the color for an individual tag by appending an 
+hexadecimal color code (`#RRGGBBAA`) to the tag name.
+
+### Examples
+
+If `(20XX)` is in the file name, add the `year.<year>` tag:
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
+if (m.find()) {
+    tags.add("year." + m.group(1))
+}
+```
+
+Use default *Calibre* folder structure to infer author.
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+// We expect the book path to look like this:
+//  /path/to/Calibre Library/Author/Title/Title - Author.pdf
+
+if (ctx._source.name.contains("-") && ctx._source.extension == "pdf") {
+    String[] names = ctx._source.name.splitOnToken('-');
+    tags.add("author." + names[1].strip());
+}
+```
+
+If the file matches a specific pattern `AAAA-000 fName1 lName1, <fName2 lName2>...`, add the `actress.<actress>` and 
+`studio.<studio>` tag:
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+Matcher m = /([A-Z]{4})-[0-9]{3} (.*)/.matcher(ctx._source.name);
+if (m.find()) {
+    tags.add("studio." + m.group(1));
+
+    // Take the matched group (.*), and add a tag for
+    //  each name, separated by comma
+    for (String name : m.group(2).splitOnToken(',')) {
+        tags.add("actress." + name);
+    }
+}
+```
+
+Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+if (ctx._source.path != "") {
+    String[] names = ctx._source.path.splitOnToken('/');
+    tags.add("studio." + names[names.length-1]);
+}
+```
+
+Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
+```Java
+ArrayList tags = ctx._source.tag = new ArrayList();
+
+if (ctx._source.path != "") {
+    String[] names = ctx._source.path.splitOnToken('/');
+    tags.add("studio." + names[names.length-1]);
+}
+```
+
+Parse `EXIF:F Number` tag
+```Java
+if (ctx._source?.exif_fnumber != null) {
+    String[] values = ctx._source.exif_fnumber.splitOnToken(' ');
+    String aperture = String.valueOf(Float.parseFloat(values[0]) / Float.parseFloat(values[1]));
+    if (aperture == "NaN") {
+        aperture = "0,0";
+    }
+    tags.add("Aperture.f/" + aperture.replace(".", ","));
+}
+```
+
+Display year and months from `EXIF:DateTime` tag
+```Java
+if (ctx._source?.exif_datetime != null) {
+    SimpleDateFormat parser = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss");
+    Date date = parser.parse(ctx._source.exif_datetime);
+
+    SimpleDateFormat yp = new SimpleDateFormat("yyyy");
+    SimpleDateFormat mp = new SimpleDateFormat("MMMMMMMMM");
+
+    String year = yp.format(date);
+    String month = mp.format(date);
+
+    tags.add("Month." + month);
+    tags.add("Year." + year);
+}
+
+```
--- a/scripting/genre_example.png
+++ b/scripting/genre_example.png
--- a/scripts/before_build.sh
+++ b/scripts/before_build.sh
@@ -1,14 +1,16 @@
-#!/bin/bash
+#!/usr/bin/env bash

 rm -rf index.sist2/

 rm web/js/bundle.js 2> /dev/null
-cat `ls -v web/js/*.min.js` > web/js/bundle.js
+cat `ls web/js/*.min.js` > web/js/bundle.js
 cat web/js/{util,dom,search}.js >> web/js/bundle.js

-rm web/css/bundle.css 2> /dev/null
+rm web/css/bundle*.css 2> /dev/null
 cat web/css/*.min.css > web/css/bundle.css
-cat web/css/main.css >> web/css/bundle.css
+cat web/css/light.css >> web/css/bundle.css
+cat web/css/*.min.css > web/css/bundle_dark.css
+cat web/css/dark.css >> web/css/bundle_dark.css

 python3 scripts/mime.py > src/parsing/mime_generated.c
 python3 scripts/serve_static.py > src/web/static_generated.c
--- a/scripts/get_static_libs.sh
+++ b/scripts/get_static_libs.sh
@@ -1,21 +1,40 @@
-#!/bin/bash
+#!/usr/bin/env bash
+
+THREADS=$(nproc)
+
 cd lib

 cd mupdf
-HAVE_X11=no HAVE_GLUT=no make -j 4
+CFLAGS=-fPIC make USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no -j $THREADS
 cd ..

 mv mupdf/build/release/libmupdf.a .
 mv mupdf/build/release/libmupdf-third.a .

+# openjp2
+cd openjpeg
+cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -DNDEBUG -fPIC"
+make -j $THREADS
+cd ..
+mv openjpeg/bin/libopenjp2.a .
+
+# harfbuzz
+cd harfbuzz
+./autogen.sh
+CFLAGS=-fPIC ./configure --disable-shared --enable-static
+make -j $THREADS
+cd ..
+mv harfbuzz/src/.libs/libharfbuzz.a .
+
 # ffmpeg
 cd ffmpeg
 ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \
 --disable-ffprobe --disable-doc\
 --disable-manpages --disable-postproc --disable-avfilter \
 --disable-alsa --disable-lzma --disable-xlib --disable-debug\
- --disable-vdpau --disable-vaapi --disable-sdl2 --disable-network
-make -j 4
+ --disable-vdpau --disable-vaapi --disable-sdl2 --disable-network\
+ --extra-cflags=-fPIC
+make -j $THREADS
 cd ..

 mv ffmpeg/libavcodec/libavcodec.a .
@@ -32,8 +51,78 @@ cmake -DONION_USE_SSL=false -DONION_USE_PAM=false -DONION_USE_PNG=false -DONION_
 -DONION_USE_JPEG=false -DONION_USE_XML2=false -DONION_USE_SYSTEMD=false -DONION_USE_SQLITE3=false \
 -DONION_USE_REDIS=false -DONION_USE_GC=false -DONION_USE_TESTS=false -DONION_EXAMPLES=false \
 -DONION_USE_BINDINGS_CPP=false ..
-make -j 4
+make -j $THREADS
 cd ../..

 mv onion/build/src/onion/libonion_static.a .
+
+#bzip2
+cd bzip2-1.0.6
+make -j $THREADS
 cd ..
+mv bzip2-1.0.6/libbz2.a .
+
+# magic
+cd libmagic
+./autogen.sh
+./configure --enable-static --disable-shared
+make -j $THREADS
+cd ..
+mv libmagic/src/.libs/libmagic.a .
+
+# tesseract
+cd tesseract
+mkdir build
+cd build
+cmake -DSTATIC=on -DBUILD_TRAINING_TOOLS=off -DBUILD_TESTS=off -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_CXX_FLAGS="-fPIC" -DAUTO_OPTIMIZE=off ..
+make -j $THREADS
+cd ../..
+mv tesseract/build/libtesseract.a .
+
+# leptonica
+cd leptonica
+./autogen.sh
+CFLAGS="-fPIC" ./configure --without-zlib --without-jpeg --without-giflib \
+  --without-giflib --without-libwebp --without-libwebpmux --without-libopenjpeg \
+  --enable-static --disable-shared
+make -j $THREADS
+cd ..
+mv leptonica/src/.libs/liblept.a .
+
+# tiff
+cd libtiff
+./autogen.sh
+CFLAGS="-fPIC" CXXFLAGS="-fPIC" CXX_FLAGS="-fPIC" ./configure --enable-static --disable-shared --disable-lzw --disable-jpeg --disable-webp \
+  --disable-lzma --disable-zstd --disable-jbig
+make -j $THREADS
+cd ..
+mv libtiff/libtiff/.libs/libtiff.a .
+
+# png
+cd libpng
+CFLAGS="-fPIC" ./configure --enable-static --disable-shared
+make -j $THREADS
+cd ..
+mv libpng/.libs/libpng16.a .
+
+# openssl...
+git clone --depth 1 -b OpenSSL_1_1_0-stable https://github.com/openssl/openssl
+cd openssl
+./config --prefix=$(pwd)/../ssl
+make depend
+make -j $THREADS
+make install
+cd ..
+mv ./openssl/libcrypto.a ./openssl/libssl.a .
+
+# curl
+wget -nc https://curl.haxx.se/download/curl-7.68.0.tar.gz
+tar -xzf curl-7.68.0.tar.gz
+cd curl-7.68.0
+./configure --disable-ldap --disable-ldaps --without-librtmp --disable-rtsp --disable-crypto-auth \
+  --disable-smtp --without-libidn2 --without-nghttp2 --without-brotli --enable-static --disable-shared \
+  --without-libpsl --with-ssl=$(pwd)/../ssl
+make -j $THREADS
+cd ..
+mv curl-7.68.0/lib/.libs/libcurl.a .
--- a/scripts/index_static.py
+++ b/scripts/index_static.py
@@ -1,6 +1,9 @@
+import json
+
 files = [
    "schema/mappings.json",
    "schema/settings.json",
+    "schema/pipeline.json",
 ]


@@ -9,6 +12,6 @@ def clean(filepath):


 for file in files:
-    with open(file, "rb") as f:
-        data = f.read()
+    with open(file, "r") as f:
+        data = json.dumps(json.load(f), separators=(",", ":")).encode()
    print("char %s[%d] = {%s};" % (clean(file), len(data), ",".join(str(int(b)) for b in data)))
--- a/scripts/mime.py
+++ b/scripts/mime.py
@@ -12,18 +12,20 @@ major_mime = {
    "audio": 7,
    "image": 8,
    "text": 9,
-    "application": 10
+    "application": 10,
+    "x-epoc": 11,
 }

 pdf = (
    "application/pdf",
-    "application/x-cbr",
    "application/x-cbz",
+    "application/epub+zip",
    "application/vnd.ms-xpsdocument",
 )

 font = (
    "application/vnd.ms-opentype",
+    "application/x-ms-compress-szdd"
    "application/x-font-sfn",
    "application/x-font-ttf",
    "font/otf",
@@ -32,6 +34,34 @@ font = (
    "font/woff2"
 )

+# Archive "formats"
+archive = (
+    "application/x-tar",
+    "application/zip",
+    "application/x-rar",
+    "application/x-arc",
+    "application/x-warc",
+    "application/x-7z-compressed",
+)
+
+# Archive "filters"
+arc_filter = (
+    "application/gzip",
+    "application/x-bzip2",
+    "application/x-xz",
+    "application/x-zstd",
+    "application/x-lzma",
+    "application/x-lz4",
+    "application/x-lzip",
+    "application/x-lzop",
+)
+
+doc = (
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+)
+
 cnt = 1


@@ -46,6 +76,12 @@ def mime_id(mime):
        mime_id += " | 0x40000000"
    elif mime in font:
        mime_id += " | 0x20000000"
+    elif mime in archive:
+        mime_id += " | 0x10000000"
+    elif mime in arc_filter:
+        mime_id += " | 0x08000000"
+    elif mime in doc:
+        mime_id += " | 0x04000000"
    elif mime == "application/x-empty":
        return "1"
    return mime_id
@@ -71,7 +107,7 @@ with open("mime.csv") as f:
    print("#include <stdlib.h>\n")
    # Enum
    print("enum mime {")
-    for mime, ext in mimes.items():
+    for mime, ext in sorted(mimes.items()):
        print("    " + clean(mime) + "=" + mime_id(mime) + ",")
    print("};")

--- a/scripts/serve_static.py
+++ b/scripts/serve_static.py
@@ -1,8 +1,9 @@
 files = [
    "web/css/bundle.css",
+    "web/css/bundle_dark.css",
    "web/js/bundle.js",
-    "web/img/bg-bars.png",
    "web/img/sprite-skin-flat.png",
+    "web/img/sprite-skin-flat-dark.png",
    "web/search.html",
 ]

--- a/sist2.png
+++ b/sist2.png
--- a/src/cli.c
+++ b/src/cli.c
@@ -1,30 +1,55 @@
 #include "cli.h"
+#include "ctx.h"

 #define DEFAULT_OUTPUT "index.sist2/"
-#define DEFAULT_CONTENT_SIZE 4096
-#define DEFAULT_QUALITY 15
-#define DEFAULT_SIZE 200
+#define DEFAULT_CONTENT_SIZE 32768
+#define DEFAULT_QUALITY 5
+#define DEFAULT_SIZE 500
 #define DEFAULT_REWRITE_URL ""

 #define DEFAULT_ES_URL "http://localhost:9200"
+#define DEFAULT_BATCH_SIZE 100

 #define DEFAULT_BIND_ADDR "localhost"
 #define DEFAULT_PORT "4090"

+const char* TESS_DATAPATHS[] = {
+        "/usr/share/tessdata/",
+        "/usr/share/tesseract-ocr/tessdata/",
+        "./",
+        NULL
+};
+

 scan_args_t *scan_args_create() {
    scan_args_t *args = calloc(sizeof(scan_args_t), 1);
+
+    args->depth = -1;
+
    return args;
 }

-index_args_t *index_args_create() {
-    index_args_t *args = calloc(sizeof(index_args_t), 1);
-    return args;
+void scan_args_destroy(scan_args_t *args) {
+    if (args->name != NULL) {
+        free(args->name);
+    }
+    if (args->path != NULL) {
+        free(args->path);
+    }
+    if (args->output != NULL) {
+        free(args->output);
+    }
+    free(args);
 }

-web_args_t *web_args_create() {
-    web_args_t *args = calloc(sizeof(web_args_t), 1);
-    return args;
+void index_args_destroy(index_args_t *args) {
+    //todo
+    free(args);
+}
+
+void web_args_destroy(web_args_t *args) {
+    //todo
+    free(args);
 }

 int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
@@ -35,7 +60,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    char *abs_path = abspath(argv[1]);
    if (abs_path == NULL) {
-        fprintf(stderr, "File not found: %s", argv[1]);
+        fprintf(stderr, "File not found: %s\n", argv[1]);
        return 1;
    } else {
        args->path = abs_path;
@@ -44,8 +69,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (args->incremental != NULL) {
        abs_path = abspath(args->incremental);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s", args->incremental);
-            return 1;
+            sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
+            args->incremental = NULL;
        }
    }

@@ -58,16 +83,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    if (args->size == 0) {
        args->size = DEFAULT_SIZE;
-    } else if (args->size <= 0) {
-        fprintf(stderr, "Invalid size: %d\n", args->size);
+    } else if (args->size > 0 && args->size < 32) {
+        printf("Invalid size: %d\n", args->content_size);
        return 1;
    }

    if (args->content_size == 0) {
        args->content_size = DEFAULT_CONTENT_SIZE;
-    } else if (args->content_size <= 0) {
-        fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
-        return 1;
    }

    if (args->threads == 0) {
@@ -90,6 +112,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        return 1;
    }

+    if (args->depth < 0) {
+        args->depth = G_MAXINT32;
+    } else {
+        args->depth += 1;
+    }
+
    if (args->name == NULL) {
        args->name = g_path_get_basename(args->output);
    }
@@ -97,11 +125,84 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (args->rewrite_url == NULL) {
        args->rewrite_url = DEFAULT_REWRITE_URL;
    }
+
+    if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
+        args->archive_mode = ARC_MODE_RECURSE;
+    } else if (strcmp(args->archive, "list") == 0) {
+        args->archive_mode = ARC_MODE_LIST;
+    } else if (strcmp(args->archive, "shallow") == 0) {
+        args->archive_mode = ARC_MODE_SHALLOW;
+    } else if (strcmp(args->archive, "skip") == 0) {
+        args->archive_mode = ARC_MODE_SKIP;
+    } else {
+        fprintf(stderr, "Archive mode must be one of (skip, list, shallow, recurse), got '%s'", args->archive);
+        return 1;
+    }
+
+    if (args->tesseract_lang != NULL) {
+        TessBaseAPI *api = TessBaseAPICreate();
+
+        char filename[128];
+        sprintf(filename, "%s.traineddata", args->tesseract_lang);
+        const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
+        if (path == NULL) {
+            LOG_FATAL("cli.c", "Could not find tesseract language file!");
+        }
+
+        ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
+        if (ret != 0) {
+            fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
+            return 1;
+        }
+        TessBaseAPIEnd(api);
+        TessBaseAPIDelete(api);
+
+        args->tesseract_path = path;
+    }
+
+    if (args->exclude_regex != NULL) {
+        const char *error;
+        int error_offset;
+
+        pcre *re = pcre_compile(args->exclude_regex, 0, &error, &error_offset, 0);
+        if (error != NULL) {
+            LOG_FATALF("cli.c", "pcre_compile returned error: %s (offset:%d)", error, error_offset)
+        }
+
+        pcre_extra *re_extra = pcre_study(re, 0, &error);
+        if (error != NULL) {
+            LOG_FATALF("cli.c", "pcre_study returned error: %s", error)
+        }
+
+        ScanCtx.exclude = re;
+        ScanCtx.exclude_extra = re_extra;
+    } else {
+        ScanCtx.exclude = NULL;
+    }
+
+    LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
+    LOG_DEBUGF("cli.c", "arg size=%d", args->size)
+    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
+    LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
+    LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
+    LOG_DEBUGF("cli.c", "arg output=%s", args->output)
+    LOG_DEBUGF("cli.c", "arg rewrite_url=%s", args->rewrite_url)
+    LOG_DEBUGF("cli.c", "arg name=%s", args->name)
+    LOG_DEBUGF("cli.c", "arg depth=%d", args->depth)
+    LOG_DEBUGF("cli.c", "arg path=%s", args->path)
+    LOG_DEBUGF("cli.c", "arg archive=%s", args->archive)
+    LOG_DEBUGF("cli.c", "arg tesseract_lang=%s", args->tesseract_lang)
+    LOG_DEBUGF("cli.c", "arg tesseract_path=%s", args->tesseract_path)
+    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
+    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
+
    return 0;
 }

 int index_args_validate(index_args_t *args, int argc, const char **argv) {

+    LogCtx.verbose = 1;
+
    if (argc < 2) {
        fprintf(stderr, "Required positional argument: PATH.\n");
        return 1;
@@ -109,20 +210,62 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        fprintf(stderr, "File not found: %s", argv[1]);
+        fprintf(stderr, "File not found: %s\n", argv[1]);
        return 1;
    } else {
        args->index_path = argv[1];
+        free(index_path);
    }

    if (args->es_url == NULL) {
        args->es_url = DEFAULT_ES_URL;
    }
+
+    if (args->script_path != NULL) {
+        struct stat info;
+        int res = stat(args->script_path, &info);
+
+        if (res == -1) {
+            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+            return 1;
+        }
+
+        int fd = open(args->script_path, O_RDONLY);
+        if (fd == -1) {
+            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+            return 1;
+        }
+
+        args->script = malloc(info.st_size + 1);
+        res = read(fd, args->script, info.st_size);
+        if (res < 0) {
+            fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
+            return 1;
+        }
+
+        *(args->script + info.st_size) = '\0';
+        close(fd);
+    }
+
+    if (args->batch_size == 0) {
+        args->batch_size = DEFAULT_BATCH_SIZE;
+    }
+
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
+    LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
+    LOG_DEBUGF("cli.c", "arg print=%d", args->print)
+    LOG_DEBUGF("cli.c", "arg batch_size=%d", args->batch_size)
+    LOG_DEBUGF("cli.c", "arg force_reset=%d", args->force_reset)
+
    return 0;
 }

 int web_args_validate(web_args_t *args, int argc, const char **argv) {

+    LogCtx.verbose = 1;
+
    if (argc < 2) {
        fprintf(stderr, "Required positional argument: PATH.\n");
        return 1;
@@ -140,16 +283,43 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
        args->port = DEFAULT_PORT;
    }

+    if (args->credentials != NULL) {
+        args->b64credentials = onion_base64_encode(args->credentials, (int) strlen(args->credentials));
+        //Remove trailing newline
+        *(args->b64credentials + strlen(args->b64credentials) - 1) = '\0';
+    }
+
    args->index_count = argc - 1;
    args->indices = argv + 1;

    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s", abs_path);
+            fprintf(stderr, "File not found: %s\n", args->indices[i]);
            return 1;
        }
    }
+
+    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
+    LOG_DEBUGF("cli.c", "arg bind=%s", args->bind)
+    LOG_DEBUGF("cli.c", "arg port=%s", args->port)
+    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
+    LOG_DEBUGF("cli.c", "arg b64credentials=%s", args->b64credentials)
+    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
+    for (int i = 0; i < args->index_count; i++) {
+        LOG_DEBUGF("cli.c", "arg indices[%d]=%s", i, args->indices[i])
+    }
+
    return 0;
 }

+index_args_t *index_args_create() {
+    index_args_t *args = calloc(sizeof(index_args_t), 1);
+    return args;
+}
+
+web_args_t *web_args_create() {
+    web_args_t *args = calloc(sizeof(web_args_t), 1);
+    return args;
+}
+
--- a/src/cli.h
+++ b/src/cli.h
@@ -12,13 +12,29 @@ typedef struct scan_args {
    char *output;
    char *rewrite_url;
    char *name;
+    int depth;
    char *path;
+    char *archive;
+    archive_mode_t archive_mode;
+    char *tesseract_lang;
+    const char *tesseract_path;
+    char *exclude_regex;
+    int fast;
 } scan_args_t;

+scan_args_t *scan_args_create();
+
+void scan_args_destroy(scan_args_t *args);
+
+int scan_args_validate(scan_args_t *args, int argc, const char **argv);
+
 typedef struct index_args {
    char *es_url;
    const char *index_path;
+    const char *script_path;
+    char *script;
    int print;
+    int batch_size;
    int force_reset;
 } index_args_t;

@@ -26,16 +42,22 @@ typedef struct web_args {
    char *es_url;
    char *bind;
    char *port;
+    char *credentials;
+    char *b64credentials;
    int index_count;
    const char **indices;
 } web_args_t;

-scan_args_t *scan_args_create();
 index_args_t *index_args_create();
+
+void index_args_destroy(index_args_t *args);
+
 web_args_t *web_args_create();

-int scan_args_validate(scan_args_t *args, int argc, const char **argv);
+void web_args_destroy(web_args_t *args);
+
 int index_args_validate(index_args_t *args, int argc, const char **argv);
+
 int web_args_validate(web_args_t *args, int argc, const char **argv);

 #endif
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -15,6 +15,10 @@ struct {
    int threads;
    int content_size;
    float tn_qscale;
+    int depth;
+    archive_mode_t archive_mode;
+    int verbose;
+    int very_verbose;

    size_t stat_tn_size;
    size_t stat_index_size;
@@ -23,16 +27,28 @@ struct {
    GHashTable *copy_table;

    pthread_mutex_t mupdf_mu;
+    char * tesseract_lang;
+    const char * tesseract_path;
+    pcre *exclude;
+    pcre_extra *exclude_extra;
+    int fast;
 } ScanCtx;

+struct {
+    int verbose;
+    int very_verbose;
+    int no_color;
+} LogCtx;

 struct {
    char *es_url;
+    int batch_size;
 } IndexCtx;

 struct {
    char *es_url;
    int index_count;
+    char *b64credentials;
    struct index_t indices[16];
 } WebCtx;

--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -1,16 +1,8 @@
 #include "elastic.h"
 #include "src/ctx.h"

-#include <stdlib.h>
-#include "web.h"
-#include <stdio.h>
-#include <string.h>
-#include <cJSON/cJSON.h>
-#include <src/ctx.h>
-
 #include "static_generated.c"

-#define BULK_INDEX_SIZE 100

 typedef struct es_indexer {
    int queued;
@@ -22,6 +14,8 @@ typedef struct es_indexer {

 static es_indexer_t *Indexer;

+void delete_queue(int max);
+
 void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {

    cJSON *line = cJSON_CreateObject();
@@ -29,13 +23,14 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    cJSON_AddStringToObject(line, "_id", uuid_str);
    cJSON_AddStringToObject(line, "_index", "sist2");
    cJSON_AddStringToObject(line, "_type", "_doc");
-    cJSON_AddItemToObject(line, "_source", document);
+    cJSON_AddItemReferenceToObject(line, "_source", document);

    char *json = cJSON_PrintUnformatted(line);

    printf("%s\n", json);

-    cJSON_free(line);
+    cJSON_free(json);
+    cJSON_Delete(line);
 }

 void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
@@ -54,23 +49,52 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    elastic_index_line(bulk_line);
 }

-void elastic_flush() {
+void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {

-    if (Indexer == NULL) {
-        Indexer = create_indexer(IndexCtx.es_url);
+    cJSON *body = cJSON_CreateObject();
+    cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
+    cJSON_AddStringToObject(script_obj, "lang", "painless");
+    cJSON_AddStringToObject(script_obj, "source", script);
+
+    cJSON *query = cJSON_AddObjectToObject(body, "query");
+    cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
+    cJSON_AddStringToObject(term_obj, "index", index_id);
+
+    char *str = cJSON_Print(body);
+
+    char bulk_url[4096];
+    snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
+    response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
+    LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
+    cJSON *resp = cJSON_Parse(r->body);
+
+    cJSON_free(str);
+    cJSON_Delete(body);
+    free_response(r);
+
+    cJSON *error = cJSON_GetObjectItem(resp, "error");
+    if (error != NULL) {
+        char *error_str = cJSON_Print(error);
+
+        LOG_ERRORF("elastic.c", "User script error: \n%s", error_str);
+        cJSON_free(error_str);
    }

-    es_bulk_line_t *line = Indexer->line_head;
+    cJSON_Delete(resp);
+}

-    int count = 0;
+void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
+    es_bulk_line_t *line = Indexer->line_head;
+    *count = 0;

    size_t buf_size = 0;
    size_t buf_cur = 0;
    char *buf = malloc(1);

-    while (line != NULL) {
+    while (line != NULL && *count < max) {
        char action_str[512];
-        snprintf(action_str, 512, "{\"index\":{\"_id\": \"%s\"}}\n", line->uuid_str);
+        snprintf(action_str, 512,
+                 "{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
        size_t action_str_len = strlen(action_str);

        size_t line_len = strlen(line->line);
@@ -82,23 +106,103 @@ void elastic_flush() {
        memcpy(buf + buf_cur, line->line, line_len);
        buf_cur += line_len;

-        es_bulk_line_t *tmp = line;
        line = line->next;
-        free(tmp);
-        count++;
+        (*count)++;
    }
    buf = realloc(buf, buf_size + 1);
-    *(buf+buf_cur) = '\0';
+    *(buf + buf_cur) = '\0';

-    Indexer->line_head = NULL;
-    Indexer->line_tail = NULL;
-    Indexer->queued = 0;
+    *buf_len = buf_cur;
+    return buf;
+}
+
+void *print_errors(response_t *r) {
+    cJSON *ret_json = cJSON_Parse(r->body);
+    if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
+        cJSON *err;
+        cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
+            if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
+                char *str = cJSON_Print(err);
+                LOG_ERRORF("elastic.c", "%s\n", str);
+                cJSON_free(str);
+            }
+        }
+    }
+    cJSON_Delete(ret_json);
+}
+
+void _elastic_flush(int max) {
+    size_t buf_len;
+    int count;
+    void *buf = create_bulk_buffer(max, &count, &buf_len);

    char bulk_url[4096];
-    snprintf(bulk_url, 4096, "%s/sist2/_bulk", Indexer->es_url);
+    snprintf(bulk_url, 4096, "%s/sist2/_bulk?pipeline=tie", Indexer->es_url);
    response_t *r = web_post(bulk_url, buf, "Content-Type: application/x-ndjson");
-    printf("Indexed %3d documents (%zukB) <%d>\n", count, buf_cur / 1024, r->status_code);
+
+    if (r->status_code == 0) {
+        LOG_FATALF("elastic.c", "Could not connect to %s, make sure that elasticsearch is running!\n", IndexCtx.es_url)
+    }
+
+    if (r->status_code == 413) {
+
+        if (max <= 1) {
+            LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->uuid_str)
+            free_response(r);
+            free(buf);
+            delete_queue(1);
+            if (Indexer->queued != 0) {
+                elastic_flush();
+            }
+            return;
+        }
+
+        LOG_WARNINGF("elastic.c", "Payload too large, retrying (%d documents)", count);
+
+        free_response(r);
+        free(buf);
+        _elastic_flush(max / 2);
+        return;
+
+    } else if (r->status_code != 200) {
+        print_errors(r);
+        delete_queue(Indexer->queued);
+
+    } else {
+
+        print_errors(r);
+        LOG_INFOF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
+        delete_queue(max);
+
+        if (Indexer->queued != 0) {
+            elastic_flush();
+        }
+    }
+
    free_response(r);
+    free(buf);
+}
+
+void delete_queue(int max) {
+    for (int i = 0; i < max; i++) {
+        es_bulk_line_t *tmp = Indexer->line_head;
+        Indexer->line_head = tmp->next;
+        if (Indexer->line_head == NULL) {
+            Indexer->line_tail = NULL;
+        } else {
+            free(tmp);
+        }
+        Indexer->queued -= 1;
+    }
+}
+
+void elastic_flush() {
+
+    if (Indexer == NULL) {
+        Indexer = create_indexer(IndexCtx.es_url);
+    }
+
+    _elastic_flush(Indexer->queued);
 }

 void elastic_index_line(es_bulk_line_t *line) {
@@ -117,15 +221,14 @@ void elastic_index_line(es_bulk_line_t *line) {

    Indexer->queued += 1;

-    if (Indexer->queued >= BULK_INDEX_SIZE) {
+    if (Indexer->queued >= IndexCtx.batch_size) {
        elastic_flush();
    }
 }

 es_indexer_t *create_indexer(const char *url) {

-    size_t url_len = strlen(url);
-    char *es_url = malloc(url_len);
+    char *es_url = malloc(strlen(url) + 1);
    strcpy(es_url, url);

    es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -138,18 +241,27 @@ es_indexer_t *create_indexer(const char *url) {
    return indexer;
 }

-void destroy_indexer() {
+void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {

    char url[4096];

    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
    response_t *r = web_post(url, "", NULL);
-    printf("Refresh index <%d>\n", r->status_code);
+    LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
+    free_response(r);
+
+    if (script != NULL) {
+        execute_update_script(script, index_id);
+    }
+
+    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
+    r = web_post(url, "", NULL);
+    LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
    free_response(r);

    snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
    r = web_post(url, "", NULL);
-    printf("Merge index <%d>\n", r->status_code);
+    LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
    free_response(r);

    if (Indexer != NULL) {
@@ -169,40 +281,75 @@ void elastic_init(int force_reset) {

    if (!index_exists || force_reset) {
        r = web_delete(url);
-        printf("Delete index <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Delete index <%d>", r->status_code);
        free_response(r);

        snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
        r = web_put(url, "", NULL);
-        printf("Create index <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Create index <%d>", r->status_code);
        free_response(r);

        snprintf(url, 4096, "%s/sist2/_close", IndexCtx.es_url);
        r = web_post(url, "", NULL);
-        printf("Close index <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Close index <%d>", r->status_code);
+        free_response(r);
+
+        snprintf(url, 4096, "%s/_ingest/pipeline/tie", IndexCtx.es_url);
+        r = web_put(url, pipeline_json, "Content-Type: application/json");
+        LOG_INFOF("elastic.c", "Create pipeline <%d>", r->status_code);
        free_response(r);

        snprintf(url, 4096, "%s/sist2/_settings", IndexCtx.es_url);
        r = web_put(url, settings_json, "Content-Type: application/json");
-        printf("Update settings <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Update settings <%d>", r->status_code);
        free_response(r);

-        snprintf(url, 4096, "%s/sist2/_mappings", IndexCtx.es_url);
+        snprintf(url, 4096, "%s/sist2/_mappings/_doc?include_type_name=true", IndexCtx.es_url);
        r = web_put(url, mappings_json, "Content-Type: application/json");
-        printf("Update mappings <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Update mappings <%d>", r->status_code);
        free_response(r);

        snprintf(url, 4096, "%s/sist2/_open", IndexCtx.es_url);
        r = web_post(url, "", NULL);
-        printf("Open index <%d>\n", r->status_code);
+        LOG_INFOF("elastic.c", "Open index <%d>", r->status_code);
        free_response(r);
    }
 }

 cJSON *elastic_get_document(const char *uuid_str) {
    char url[4096];
-    snprintf(url, 4096, "%s/sist2/_source/%s", WebCtx.es_url, uuid_str);
+    snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);

    response_t *r = web_get(url);
-    return cJSON_Parse(r->body);
+    cJSON *json = NULL;
+    if (r->status_code == 200) {
+        json = cJSON_Parse(r->body);
+    }
+    free_response(r);
+    return json;
+}
+
+char *elastic_get_status() {
+    char url[4096];
+    snprintf(url, 4096,
+             "%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);
+
+    response_t *r = web_get(url);
+    cJSON *json = NULL;
+    char *status = malloc(128 * sizeof(char));
+    status[0] = '\0';
+
+    if (r->status_code == 200) {
+        json = cJSON_Parse(r->body);
+        const cJSON *metadata = cJSON_GetObjectItem(json, "metadata");
+        if (metadata != NULL) {
+            const cJSON *indices = cJSON_GetObjectItem(metadata, "indices");
+            const cJSON *sist2 = cJSON_GetObjectItem(indices, "sist2");
+            const cJSON *state = cJSON_GetObjectItem(sist2, "state");
+            strcpy(status, state->valuestring);
+        }
+    }
+    free_response(r);
+    cJSON_Delete(json);
+    return status;
 }
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -24,10 +24,12 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

 es_indexer_t *create_indexer(const char* es_url);

-void destroy_indexer();
+void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);

 void elastic_init(int force_reset);

 cJSON *elastic_get_document(const char *uuid_str);

+char *elastic_get_status();
+
 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@@ -49,18 +49,19 @@ response_t *web_post(const char *url, const char *data, const char *header) {
    curl_easy_setopt(curl, CURLOPT_POST, 1);
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");

+    struct curl_slist *headers = NULL;
    if (header != NULL) {
-        struct curl_slist *headers = NULL;
        headers = curl_slist_append(headers, header);
        curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
    }

    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);

-    int r1 = curl_easy_perform(curl);
+    curl_easy_perform(curl);
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);

    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);

    resp->body = buffer.buf;
    resp->size = buffer.cur;
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -1,7 +1,7 @@
 #include "src/ctx.h"
 #include "serialize.h"

-static __thread int IndexFd = -1;
+static __thread int index_fd = -1;

 typedef struct {
    unsigned char uuid[16];
@@ -34,12 +34,13 @@ void write_index_descriptor(char *path, index_descriptor_t *desc) {
    cJSON_AddStringToObject(json, "version", desc->version);
    cJSON_AddStringToObject(json, "root", desc->root);
    cJSON_AddStringToObject(json, "name", desc->name);
+    cJSON_AddStringToObject(json, "type", desc->type);
    cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
    cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);

    int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
-    if (fd == -1) {
-        perror(path);
+    if (fd < 0) {
+        LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
    }
    char *str = cJSON_Print(json);
    write(fd, str, strlen(str));
@@ -54,6 +55,11 @@ index_descriptor_t read_index_descriptor(char *path) {
    struct stat info;
    stat(path, &info);
    int fd = open(path, O_RDONLY);
+
+    if (fd == -1) {
+        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
+    }
+
    char *buf = malloc(info.st_size + 1);
    read(fd, buf, info.st_size);
    *(buf + info.st_size) = '\0';
@@ -66,9 +72,14 @@ index_descriptor_t read_index_descriptor(char *path) {
    strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
    strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
    strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
-    descriptor.root_len = (short)strlen(descriptor.root);
+    descriptor.root_len = (short) strlen(descriptor.root);
    strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
    strcpy(descriptor.uuid, cJSON_GetObjectItem(json, "uuid")->valuestring);
+    if (cJSON_GetObjectItem(json, "type") == NULL) {
+        strcpy(descriptor.type, INDEX_TYPE_BIN);
+    } else {
+        strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
+    }

    cJSON_Delete(json);
    free(buf);
@@ -105,6 +116,26 @@ char *get_meta_key_text(enum metakey meta_key) {
            return "title";
        case MetaFontName:
            return "font_name";
+        case MetaParent:
+            return "parent";
+        case MetaExifMake:
+            return "exif_make";
+        case MetaExifSoftware:
+            return "exif_software";
+        case MetaExifExposureTime:
+            return "exif_exposure_time";
+        case MetaExifFNumber:
+            return "exif_fnumber";
+        case MetaExifFocalLength:
+            return "exif_focal_length";
+        case MetaExifUserComment:
+            return "exif_user_comment";
+        case MetaExifIsoSpeedRatings:
+            return "exif_iso_speed_ratings";
+        case MetaExifModel:
+            return "exif_model";
+        case MetaExifDateTime:
+            return "exif_datetime";
        default:
            return NULL;
    }
@@ -113,13 +144,13 @@ char *get_meta_key_text(enum metakey meta_key) {

 void write_document(document_t *doc) {

-    if (IndexFd == -1) {
+    if (index_fd == -1) {
        char dstfile[PATH_MAX];
-        pid_t tid = syscall(SYS_gettid);
-        snprintf(dstfile, PATH_MAX, "%s_index_%d", ScanCtx.index.path, tid);
-        IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
+        pthread_t self = pthread_self();
+        snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
+        index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);

-        if (IndexFd == -1) {
+        if (index_fd == -1) {
            perror("open");
        }
    }
@@ -152,17 +183,22 @@ void write_document(document_t *doc) {
    }
    dyn_buffer_write_char(&buf, '\n');

-    write(IndexFd, buf.buf, buf.cur);
+    int res = write(index_fd, buf.buf, buf.cur);
+    if (res == -1) {
+        LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
+    }
    ScanCtx.stat_index_size += buf.cur;
    dyn_buffer_destroy(&buf);
 }

-void serializer_cleanup() {
-    close(IndexFd);
+void thread_cleanup() {
+    close(index_fd);
+    cleanup_parse();
+    cleanup_font();
 }

-void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {

+void read_index_bin(const char *path, const char *index_id, index_func func) {
    line_t line;
    dyn_buffer_t buf = dyn_buffer_create();

@@ -180,8 +216,13 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
        char uuid_str[UUID_STR_LEN];
        uuid_unparse(line.uuid, uuid_str);

-        cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
-        cJSON_AddNumberToObject(document, "size", (double)line.size);
+        const char* mime_text = mime_get_mime_text(line.mime);
+        if (mime_text == NULL) {
+            cJSON_AddNullToObject(document, "mime");
+        } else {
+            cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
+        }
+        cJSON_AddNumberToObject(document, "size", (double) line.size);
        cJSON_AddNumberToObject(document, "mtime", line.mtime);

        int c;
@@ -197,21 +238,30 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
            *(buf.buf + line.ext) = '\0';
        }
        cJSON_AddStringToObject(document, "name", buf.buf + line.base);
-        *(buf.buf + line.base - 1) = '\0';
-        cJSON_AddStringToObject(document, "path", buf.buf);
+        if (line.base > 0) {
+            *(buf.buf + line.base - 1) = '\0';
+            cJSON_AddStringToObject(document, "path", buf.buf);
+        } else {
+            cJSON_AddStringToObject(document, "path", "");
+        }

        enum metakey key = getc(file);
        while (key != '\n') {
            switch (key) {
                case MetaWidth:
-                case MetaHeight:
-                case MetaMediaDuration:
-                case MetaMediaBitrate: {
+                case MetaHeight: {
                    int value;
                    fread(&value, sizeof(int), 1, file);
                    cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
                    break;
                }
+                case MetaMediaDuration:
+                case MetaMediaBitrate: {
+                    long value;
+                    fread(&value, sizeof(long), 1, file);
+                    cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
+                    break;
+                }
                case MetaMediaAudioCodec:
                case MetaMediaVideoCodec: {
                    int value;
@@ -229,26 +279,124 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
                case MetaAlbumArtist:
                case MetaGenre:
                case MetaFontName:
+                case MetaParent:
+                case MetaExifMake:
+                case MetaExifSoftware:
+                case MetaExifExposureTime:
+                case MetaExifFNumber:
+                case MetaExifFocalLength:
+                case MetaExifUserComment:
+                case MetaExifIsoSpeedRatings:
+                case MetaExifDateTime:
+                case MetaExifModel:
                case MetaTitle: {
                    buf.cur = 0;
                    while ((c = getc(file)) != 0) {
-                        dyn_buffer_write_char(&buf, (char) c);
+                        if (SHOULD_KEEP_CHAR(c) || c == ' ') {
+                            dyn_buffer_write_char(&buf, (char) c);
+                        }
                    }
                    dyn_buffer_write_char(&buf, '\0');
                    cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
                    break;
                }
+                default:
+                LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
            }

            key = getc(file);
        }

        func(document, uuid_str);
-        cJSON_free(document);
+        cJSON_Delete(document);
+    }
+    dyn_buffer_destroy(&buf);
+    fclose(file);
+}
+
+const char *json_type_copy_fields[] = {
+        "mime", "name", "path", "extension", "index", "size", "mtime", "parent",
+
+        // Meta
+        "title", "content", "width", "height", "duration", "audioc", "videoc",
+        "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name",
+
+        // Special
+        "tag", "_url"
+};
+
+const char *json_type_array_fields[] = {
+        "_keyword", "_text"
+};
+
+void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
+
+    FILE *file = fopen(path, "r");
+    while (1) {
+        char *line = NULL;
+        size_t len;
+        size_t read = getline(&line, &len, file);
+        if (read < 0) {
+            if (line) {
+                free(line);
+            }
+            break;
+        }
+
+        cJSON *input = cJSON_Parse(line);
+        if (input == NULL) {
+            LOG_FATALF("serialize.c", "Could not parse JSON line: \n%s", line)
+        }
+        if (line) {
+            free(line);
+        }
+
+        cJSON *document = cJSON_CreateObject();
+        const char *uuid_str = cJSON_GetObjectItem(input, "_id")->valuestring;
+
+        for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
+            cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
+            if (value != NULL) {
+                cJSON_AddItemReferenceToObject(document, json_type_copy_fields[i], value);
+            }
+        }
+
+        for (int i = 0; i < (sizeof(json_type_array_fields) / sizeof(json_type_array_fields[0])); i++) {
+            cJSON *arr = cJSON_GetObjectItem(input, json_type_array_fields[i]);
+            if (arr != NULL) {
+                cJSON *obj;
+                cJSON_ArrayForEach(obj, arr) {
+                    char key[1024];
+                    cJSON *k = cJSON_GetObjectItem(obj, "k");
+                    cJSON *v = cJSON_GetObjectItem(obj, "v");
+                    if (k == NULL || v == NULL || !cJSON_IsString(k) || !cJSON_IsString(v)) {
+                        char *str = cJSON_Print(obj);
+                        LOG_FATALF("serialize.c", "Invalid %s member: must contain .k and .v string fields: \n%s",
+                                   json_type_array_fields[i], str)
+                    }
+                    snprintf(key, sizeof(key), "%s.%s", json_type_array_fields[i], k->valuestring);
+                    cJSON_AddStringToObject(document, key, v->valuestring);
+                }
+            }
+        }
+
+        func(document, uuid_str);
+        cJSON_Delete(document);
+        cJSON_Delete(input);
+
    }
    fclose(file);
 }

+void read_index(const char *path, const char index_id[UUID_STR_LEN], const char *type, index_func func) {
+
+    if (strcmp(type, INDEX_TYPE_BIN) == 0) {
+        read_index_bin(path, index_id, func);
+    } else if (strcmp(type, INDEX_TYPE_JSON) == 0) {
+        read_index_json(path, index_id, func);
+    }
+}
+
 void incremental_read(GHashTable *table, const char *filepath) {
    FILE *file = fopen(filepath, "rb");
    line_t line;
@@ -289,6 +437,7 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
            size_t buf_len;
            char *buf = store_read(store, (char *) line.uuid, 16, &buf_len);
            store_write(dst_store, (char *) line.uuid, 16, buf, buf_len);
+            free(buf);

            char c;
            while ((c = (char) getc(file))) {
--- a/src/io/serialize.h
+++ b/src/io/serialize.h
@@ -11,14 +11,14 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,

 void write_document(document_t *doc);

-void read_index(const char *path, const char[UUID_STR_LEN], index_func);
+void read_index(const char *path, const char[UUID_STR_LEN], const char *type, index_func);

 void incremental_read(GHashTable *table, const char *filepath);

 /**
 * Must be called after write_document
 */
-void serializer_cleanup();
+void thread_cleanup();

 void write_index_descriptor(char *path, index_descriptor_t *desc);

--- a/src/io/store.c
+++ b/src/io/store.c
@@ -9,14 +9,13 @@ store_t *store_create(char *path) {
    mdb_env_create(&store->env);

    int open_ret = mdb_env_open(store->env,
-                 path,
-                 MDB_WRITEMAP | MDB_MAPASYNC,
-                 S_IRUSR | S_IWUSR
+                                path,
+                                MDB_WRITEMAP | MDB_MAPASYNC,
+                                S_IRUSR | S_IWUSR
    );

    if (open_ret != 0) {
-        fprintf(stderr, "Error while opening store: %s", mdb_strerror(open_ret));
-        exit(1);
+        LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
    }

    store->size = (size_t) 1024 * 1024 * 5;
@@ -42,6 +41,12 @@ void store_destroy(store_t *store) {

 void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

+    if (LogCtx.very_verbose) {
+        char uuid_str[UUID_STR_LEN];
+        uuid_unparse((unsigned char *) key, uuid_str);
+        LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
+    }
+
    MDB_val mdb_key;
    mdb_key.mv_data = key;
    mdb_key.mv_size = key_len;
@@ -64,17 +69,19 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
        // Cannot resize when there is a opened transaction.
        //  Resize take effect on the next commit.
        pthread_rwlock_wrlock(&store->lock);
-        store->size += 1024 * 1024 * 5;
+        store->size += 1024 * 1024 * 50;
        mdb_env_set_mapsize(store->env, store->size);
        mdb_txn_begin(store->env, NULL, 0, &txn);
        put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
+
+        LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
    }

    mdb_txn_commit(txn);
    pthread_rwlock_unlock(&store->lock);

    if (put_ret != 0) {
-        printf("%s\n", mdb_strerror(put_ret));
+        LOG_ERROR("store.c", mdb_strerror(put_ret))
    }
 }

--- a/src/io/walk.c
+++ b/src/io/walk.c
@@ -1,28 +1,46 @@
 #include "walk.h"
 #include "src/ctx.h"

-parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
+__always_inline
+parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
    int len = (int) strlen(filepath);
-
    parse_job_t *job = malloc(sizeof(parse_job_t) + len);

-    memcpy(&(job->filepath), filepath, len + 1);
+    strcpy(job->filepath, filepath);
    job->base = base;
    char *p = strrchr(filepath + base, '.');
    if (p != NULL) {
-        job->ext = (int)(p - filepath + 1);
+        job->ext = (int) (p - filepath + 1);
    } else {
        job->ext = len;
    }

-    memcpy(&(job->info), info, sizeof(struct stat));
+    job->info = *info;
+
+    memset(job->parent, 0, 16);
+
+    job->vfile.filepath = job->filepath;
+    job->vfile.read = fs_read;
+    job->vfile.close = fs_close;
+    job->vfile.fd = -1;
+    job->vfile.is_fs_file = TRUE;

    return job;
 }

+int sub_strings[30];
+#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
+
 int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
-    if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
-        parse_job_t *job = create_parse_job(filepath, info, ftw->base);
+
+    if (typeflag == FTW_F && S_ISREG(info->st_mode) && ftw->level <= ScanCtx.depth) {
+
+        if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
+            LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
+            return 0;
+        }
+
+        parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
        tpool_add_work(ScanCtx.pool, parse, job);
    }

--- a/src/log.c
+++ b/src/log.c
@@ -0,0 +1,99 @@
+#include "log.h"
+
+const char *log_colors[] = {
+        "\033[34m", "\033[01;34m", "\033[0m",
+        "\033[01;33m", "\033[31m", "\033[01;31m"
+};
+
+const char *log_levels[] = {
+        "DEBUG", "INFO", "WARNING", "ERROR", "FATAL"
+};
+
+void sist_logf(char *filepath, int level, char *format, ...) {
+
+    static int is_tty = -1;
+    if (is_tty == -1) {
+        is_tty = isatty(STDERR_FILENO);
+    }
+
+    char log_str[LOG_MAX_LENGTH];
+
+    unsigned long long pid = (unsigned long long) pthread_self();
+
+    char datetime[32];
+    time_t t;
+    struct tm result;
+    t = time(NULL);
+    localtime_r(&t, &result);
+    strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
+
+    int log_len;
+    if (is_tty) {
+        log_len = snprintf(
+                log_str, sizeof(log_str),
+                "\033[%dm[%04X]%s [%s] [%s %s] ",
+                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                datetime, log_levels[level], filepath
+        );
+    } else {
+        log_len = snprintf(
+                log_str, sizeof(log_str),
+                "[%04X] [%s] [%s %s] ",
+                pid, datetime, log_levels[level], filepath
+        );
+    }
+
+    va_list ap;
+    va_start(ap, format);
+    size_t maxsize = sizeof(log_str) - log_len;
+    log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
+    va_end(ap);
+
+    if (is_tty) {
+        log_len += sprintf(log_str + log_len, "\033[0m\n");
+    } else {
+        *(log_str + log_len) = '\n';
+        log_len += 1;
+    }
+
+    write(STDERR_FILENO, log_str, log_len);
+}
+
+void sist_log(char *filepath, int level, char *str) {
+
+    static int is_tty = -1;
+    if (is_tty == -1) {
+        is_tty = isatty(STDERR_FILENO);
+    }
+
+    char log_str[LOG_MAX_LENGTH];
+
+    unsigned long long pid = (unsigned long long) pthread_self();
+
+    char datetime[32];
+    time_t t;
+    struct tm result;
+    t = time(NULL);
+    localtime_r(&t, &result);
+    strftime(datetime, sizeof(datetime), "%Y-%m-%d %H:%M:%S", &result);
+
+    int log_len;
+    if (is_tty) {
+        log_len = snprintf(
+                log_str, sizeof(log_str),
+                "\033[%dm[%04X]%s [%s] [%s %s] %s \033[0m\n",
+                31 + ((unsigned int) (pid)) % 7, pid, log_colors[level],
+                datetime, log_levels[level], filepath,
+                str
+        );
+    } else {
+        log_len = snprintf(
+                log_str, sizeof(log_str),
+                "[%04X] [%s] [%s %s] %s \n",
+                pid, datetime, log_levels[level], filepath,
+                str
+        );
+    }
+
+    write(STDERR_FILENO, log_str, log_len);
+}
--- a/src/log.h
+++ b/src/log.h
@@ -0,0 +1,45 @@
+#ifndef SIST2_LOG_H
+#define SIST2_LOG_H
+
+#define LOG_MAX_LENGTH 8192
+
+#define SIST_DEBUG 0
+#define SIST_INFO 1
+#define SIST_WARNING 2
+#define SIST_ERROR 3
+#define SIST_FATAL 4
+
+#define LOG_DEBUGF(filepath, fmt, ...) \
+    if (LogCtx.very_verbose) {sist_logf(filepath, SIST_DEBUG, fmt, __VA_ARGS__);}
+#define LOG_DEBUG(filepath, str) \
+    if (LogCtx.very_verbose) {sist_log(filepath, SIST_DEBUG, str);}
+
+#define LOG_INFOF(filepath, fmt, ...) \
+    if (LogCtx.verbose) {sist_logf(filepath, SIST_INFO, fmt, __VA_ARGS__);}
+#define LOG_INFO(filepath, str) \
+    if (LogCtx.verbose) {sist_log(filepath, SIST_INFO, str);}
+
+#define LOG_WARNINGF(filepath, fmt, ...) \
+    if (LogCtx.verbose) {sist_logf(filepath, SIST_WARNING, fmt, __VA_ARGS__);}
+#define LOG_WARNING(filepath, str) \
+    if (LogCtx.verbose) {sist_log(filepath, SIST_WARNING, str);}
+
+#define LOG_ERRORF(filepath, fmt, ...) \
+    if (LogCtx.verbose) {sist_logf(filepath, SIST_ERROR, fmt, __VA_ARGS__);}
+#define LOG_ERROR(filepath, str) \
+    if (LogCtx.verbose) {sist_log(filepath, SIST_ERROR, str);}
+
+#define LOG_FATALF(filepath, fmt, ...) \
+    sist_logf(filepath, SIST_FATAL, fmt, __VA_ARGS__);\
+    exit(-1);
+#define LOG_FATAL(filepath, str) \
+    sist_log(filepath, SIST_FATAL, str);\
+    exit(-1);
+
+#include "src/sist.h"
+
+void sist_logf(char *filepath, int level, char *format, ...);
+
+void sist_log(char *filepath, int level, char *str);
+
+#endif
--- a/src/main.c
+++ b/src/main.c
@@ -2,10 +2,11 @@
 #include "ctx.h"

 #define DESCRIPTION "Lightning-fast file system indexer and search tool."
+
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "1.0.0";
+static const char *const Version = "1.3.2";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
@@ -23,21 +24,17 @@ void init_dir(const char *dirpath) {
    snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);

    uuid_t uuid;
-    uuid_generate_time_safe(uuid);
+    uuid_generate(uuid);
    uuid_unparse(uuid, ScanCtx.index.desc.uuid);
    time(&ScanCtx.index.desc.timestamp);
    strcpy(ScanCtx.index.desc.version, Version);
+    strcpy(ScanCtx.index.desc.type, INDEX_TYPE_BIN);

    write_index_descriptor(path, &ScanCtx.index.desc);
 }

 void scan_print_header() {
-    printf("sist2 V%s\n", Version);
-    printf("---------------------\n");
-    printf("threads\t\t%d\n", ScanCtx.threads);
-    printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
-    printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
-    printf("output\t\t%s\n", ScanCtx.index.path);
+    LOG_INFOF("main.c", "sist2 v%s", Version)
 }

 void sist2_scan(scan_args_t *args) {
@@ -45,18 +42,25 @@ void sist2_scan(scan_args_t *args) {
    ScanCtx.tn_qscale = args->quality;
    ScanCtx.tn_size = args->size;
    ScanCtx.content_size = args->content_size;
-    ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
    ScanCtx.threads = args->threads;
+    ScanCtx.depth = args->depth;
+    ScanCtx.archive_mode = args->archive_mode;
    strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
    strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
-    strcpy(ScanCtx.index.desc.root, args->path);
+    strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
+    strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
    ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
+    ScanCtx.tesseract_lang = args->tesseract_lang;
+    ScanCtx.tesseract_path = args->tesseract_path;
+    ScanCtx.fast = args->fast;

    init_dir(ScanCtx.index.path);

    ScanCtx.mime_table = mime_get_mime_table();
    ScanCtx.ext_table = mime_get_ext_table();

+    cbr_init();
+
    char store_path[PATH_MAX];
    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
    mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
@@ -70,9 +74,18 @@ void sist2_scan(scan_args_t *args) {

        DIR *dir = opendir(args->incremental);
        if (dir == NULL) {
-            perror("opendir");
-            return;
+            LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
        }
+
+        char descriptor_path[PATH_MAX];
+        snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
+        index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
+
+        if (strcmp(original_desc.version, Version) != 0) {
+            LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", original_desc.version,
+                       Version, INDEX_VERSION_EXTERNAL)
+        }
+
        struct dirent *de;
        while ((de = readdir(dir)) != NULL) {
            if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -83,9 +96,11 @@ void sist2_scan(scan_args_t *args) {
        }
        closedir(dir);

-        printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
+        LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
    }

+    ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
+    tpool_start(ScanCtx.pool);
    walk_directory_tree(ScanCtx.index.desc.root);
    tpool_wait(ScanCtx.pool);
    tpool_destroy(ScanCtx.pool);
@@ -119,6 +134,7 @@ void sist2_scan(scan_args_t *args) {
 void sist2_index(index_args_t *args) {

    IndexCtx.es_url = args->es_url;
+    IndexCtx.batch_size = args->batch_size;

    if (!args->print) {
        elastic_init(args->force_reset);
@@ -128,15 +144,17 @@ void sist2_index(index_args_t *args) {
    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);

    index_descriptor_t desc = read_index_descriptor(descriptor_path);
-    if (strcmp(desc.version, Version) != 0) {
-        fprintf(stderr, "Version mismatch! Index is v%s but executable is v%s\n", desc.version, Version);
-        return;
+
+    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+
+    if (strcmp(desc.version, Version) != 0 && strcmp(desc.version, INDEX_VERSION_EXTERNAL) != 0) {
+        LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s/%s", desc.version, Version,
+                   INDEX_VERSION_EXTERNAL)
    }

    DIR *dir = opendir(args->index_path);
    if (dir == NULL) {
-        perror("opendir");
-        return;
+        LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
    }

    index_func f;
@@ -151,13 +169,14 @@ void sist2_index(index_args_t *args) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
            char file_path[PATH_MAX];
            snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
-            read_index(file_path, desc.uuid, f);
+            read_index(file_path, desc.uuid, desc.type, f);
        }
    }
+    closedir(dir);

    if (!args->print) {
        elastic_flush();
-        destroy_indexer();
+        destroy_indexer(args->script, desc.uuid);
    }
 }

@@ -165,6 +184,7 @@ void sist2_web(web_args_t *args) {

    WebCtx.es_url = args->es_url;
    WebCtx.index_count = args->index_count;
+    WebCtx.b64credentials = args->b64credentials;

    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
@@ -196,34 +216,53 @@ int main(int argc, const char *argv[]) {
    index_args_t *index_args = index_args_create();
    web_args_t *web_args = web_args_create();

-    char * common_es_url;
+    int arg_version = 0;
+
+    char *common_es_url = NULL;

    struct argparse_option options[] = {
            OPT_HELP(),

+            OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
+            OPT_BOOLEAN(0, "verbose", &LogCtx.verbose, "Turn on logging"),
+            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),
+
            OPT_GROUP("Scan options"),
            OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
            OPT_FLOAT('q', "quality", &scan_args->quality,
-                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
-            OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
+                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
+            OPT_INTEGER(0, "size", &scan_args->size,
+                        "Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
            OPT_INTEGER(0, "content-size", &scan_args->content_size,
-                        "Number of bytes to be extracted from text documents. DEFAULT=4096"),
+                        "Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
            OPT_STRING(0, "incremental", &scan_args->incremental,
                       "Reuse an existing index and only scan modified files."),
            OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
            OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
            OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
+            OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
+                                                       "Use 0 to only scan files in PATH. DEFAULT: -1"),
+            OPT_STRING(0, "archive", &scan_args->archive, "Archive file mode (skip|list|shallow|recurse). "
+                                                          "skip: Don't parse, list: only get file names as text, "
+                                                          "shallow: Don't parse archives inside archives. DEFAULT: recurse"),
+            OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
+                                                             "which are installed on your machine)"),
+            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
+            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),

            OPT_GROUP("Index options"),
-            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
+            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
+            OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
+            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
-                                                              "(You must use this option the first time you use the index command)"),
+                                                                      "(You must use this option the first time you use the index command)"),

            OPT_GROUP("Web options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
            OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
            OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
+            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),

            OPT_END(),
    };
@@ -233,17 +272,26 @@ int main(int argc, const char *argv[]) {
    argparse_describe(&argparse, DESCRIPTION, EPILOG);
    argc = argparse_parse(&argparse, argc, argv);

+    if (arg_version) {
+        printf(Version);
+        goto end;
+    }
+
+    if (LogCtx.very_verbose != 0) {
+        LogCtx.verbose = 1;
+    }
+
    web_args->es_url = common_es_url;
    index_args->es_url = common_es_url;

    if (argc == 0) {
        argparse_usage(&argparse);
-        return 1;
+        goto end;
    } else if (strcmp(argv[0], "scan") == 0) {

        int err = scan_args_validate(scan_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_scan(scan_args);

@@ -251,7 +299,7 @@ int main(int argc, const char *argv[]) {

        int err = index_args_validate(index_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_index(index_args);

@@ -259,15 +307,21 @@ int main(int argc, const char *argv[]) {

        int err = web_args_validate(web_args, argc, argv);
        if (err != 0) {
-            return err;
+            goto end;
        }
        sist2_web(web_args);

    } else {
        fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
        argparse_usage(&argparse);
-        return 1;
+        goto end;
    }
    printf("\n");
+
+    end:
+    scan_args_destroy(scan_args);
+    index_args_destroy(index_args);
+    web_args_destroy(web_args);
+
    return 0;
 }
--- a/src/parsing/arc.c
+++ b/src/parsing/arc.c
@@ -0,0 +1,155 @@
+#include "arc.h"
+#include "src/ctx.h"
+
+int should_parse_filtered_file(const char *filepath, int ext) {
+    char tmp[PATH_MAX * 2];
+
+    if (ext == 0) {
+        return FALSE;
+    }
+
+    memcpy(tmp, filepath, ext - 1);
+    *(tmp + ext - 1) = '\0';
+
+    char *idx = strrchr(tmp, '.');
+
+    if (idx == NULL) {
+        return FALSE;
+    }
+
+    if (strcmp(idx, ".tar") == 0) {
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+int arc_read(struct vfile *f, void *buf, size_t size) {
+    return archive_read_data(f->arc, buf, size);
+}
+
+typedef struct arc_data {
+    vfile_t *f;
+    char buf[ARC_BUF_SIZE];
+} arc_data_f;
+
+int vfile_open_callback(struct archive *a, void *user_data) {
+    arc_data_f *data = user_data;
+
+    if (data->f->is_fs_file && data->f->fd == -1) {
+        data->f->fd = open(data->f->filepath, O_RDONLY);
+    }
+
+    return ARCHIVE_OK;
+}
+
+long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
+    arc_data_f *data = user_data;
+
+    *buf = data->buf;
+    return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
+}
+
+int vfile_close_callback(struct archive *a, void *user_data) {
+    arc_data_f *data = user_data;
+
+    if (data->f->close != NULL) {
+        data->f->close(data->f);
+    }
+
+    return ARCHIVE_OK;
+}
+
+void parse_archive(vfile_t *f, document_t *doc) {
+
+    struct archive *a;
+    struct archive_entry *entry;
+
+
+    arc_data_f data;
+    data.f = f;
+
+    int ret = 0;
+    if (data.f->is_fs_file) {
+
+        a = archive_read_new();
+        archive_read_support_filter_all(a);
+        archive_read_support_format_all(a);
+
+        ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
+    } else if (ScanCtx.archive_mode == ARC_MODE_RECURSE) {
+
+        a = archive_read_new();
+        archive_read_support_filter_all(a);
+        archive_read_support_format_all(a);
+
+        ret = archive_read_open(
+                a, &data,
+                vfile_open_callback,
+                vfile_read_callback,
+                vfile_close_callback
+        );
+    } else {
+        return;
+    }
+
+    if (ret != ARCHIVE_OK) {
+        LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
+        archive_read_free(a);
+        return;
+    }
+
+    if (ScanCtx.archive_mode == ARC_MODE_LIST) {
+
+        dyn_buffer_t buf = dyn_buffer_create();
+
+        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
+            if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
+
+                char *path = (char *) archive_entry_pathname(entry);
+
+                dyn_buffer_append_string(&buf, path);
+                dyn_buffer_write_char(&buf, '\n');
+            }
+        }
+        dyn_buffer_write_char(&buf, '\0');
+
+        meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
+        meta_list->key = MetaContent;
+        strcpy(meta_list->strval, buf.buf);
+        APPEND_META(doc, meta_list);
+        dyn_buffer_destroy(&buf);
+
+    } else {
+
+        parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
+
+        sub_job->vfile.close = NULL;
+        sub_job->vfile.read = arc_read;
+        sub_job->vfile.arc = a;
+        sub_job->vfile.filepath = sub_job->filepath;
+        sub_job->vfile.is_fs_file = FALSE;
+        memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
+
+        while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
+            sub_job->info = *archive_entry_stat(entry);
+            if (S_ISREG(sub_job->info.st_mode)) {
+                sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
+                sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
+
+                char *p = strrchr(sub_job->filepath, '.');
+                if (p != NULL) {
+                    sub_job->ext = (int) (p - sub_job->filepath + 1);
+                } else {
+                    sub_job->ext = (int) strlen(sub_job->filepath);
+                }
+
+                parse(sub_job);
+            }
+        }
+
+        free(sub_job);
+    }
+
+    archive_read_free(a);
+}
--- a/src/parsing/arc.h
+++ b/src/parsing/arc.h
@@ -0,0 +1,13 @@
+#ifndef SIST2_ARC_H
+#define SIST2_ARC_H
+
+#include "src/sist.h"
+#define ARC_BUF_SIZE 8192
+
+int should_parse_filtered_file(const char *filepath, int ext);
+
+void parse_archive(vfile_t *f, document_t *doc);
+
+int arc_read(struct vfile * f, void *buf, size_t size);
+
+#endif
--- a/src/parsing/cbr.c
+++ b/src/parsing/cbr.c
@@ -0,0 +1,52 @@
+#include "cbr.h"
+#include "src/ctx.h"
+
+unsigned int cbr_mime;
+unsigned int cbz_mime;
+
+void cbr_init() {
+    cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
+    cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
+}
+
+int is_cbr(unsigned int mime) {
+    return mime == cbr_mime;
+}
+
+void parse_cbr(void *buf, size_t buf_len, document_t *doc) {
+    char *out_buf = malloc(buf_len * 2);
+    size_t out_buf_used = 0;
+
+    struct archive *rar_in = archive_read_new();
+    archive_read_support_filter_none(rar_in);
+    archive_read_support_format_rar(rar_in);
+
+    archive_read_open_memory(rar_in, buf, buf_len);
+
+    struct archive *zip_out = archive_write_new();
+    archive_write_set_format_zip(zip_out);
+    archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used);
+
+    struct archive_entry *entry;
+    while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) {
+        archive_write_header(zip_out, entry);
+
+        char arc_buf[ARC_BUF_SIZE];
+        int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
+        while (len > 0) {
+            archive_write_data(zip_out, arc_buf, len);
+            len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE);
+        }
+    }
+
+    archive_write_close(zip_out);
+    archive_write_free(zip_out);
+
+    archive_read_close(rar_in);
+    archive_read_free(rar_in);
+
+    doc->mime = cbz_mime;
+    parse_pdf(out_buf, out_buf_used, doc);
+    doc->mime = cbr_mime;
+    free(out_buf);
+}
--- a/src/parsing/cbr.h
+++ b/src/parsing/cbr.h
@@ -0,0 +1,12 @@
+#ifndef SIST2_CBR_H
+#define SIST2_CBR_H
+
+#include "src/sist.h"
+
+void cbr_init();
+
+int is_cbr(unsigned int mime);
+
+void parse_cbr(void *buf, size_t buf_len, document_t *doc);
+
+#endif
--- a/src/parsing/doc.c
+++ b/src/parsing/doc.c
@@ -0,0 +1,140 @@
+#include "doc.h"
+#include "src/ctx.h"
+
+
+#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
+
+__always_inline
+static int should_read_part(const char *part) {
+
+    LOG_DEBUGF("doc.c", "Got part : %s", part)
+
+    if (part == NULL) {
+        return FALSE;
+    }
+
+    if (    // Word
+            STR_STARTS_WITH(part, "word/document.xml")
+            || STR_STARTS_WITH(part, "word/footnotes.xml")
+            || STR_STARTS_WITH(part, "word/endnotes.xml")
+            || STR_STARTS_WITH(part, "word/footer")
+            || STR_STARTS_WITH(part, "word/header")
+            // PowerPoint
+            || STR_STARTS_WITH(part, "ppt/slides/slide")
+            || STR_STARTS_WITH(part, "ppt/notesSlides/slide")
+            // Excel
+            || STR_STARTS_WITH(part, "xl/worksheets/sheet")
+            || STR_STARTS_WITH(part, "xl/sharedStrings.xml")
+            || STR_STARTS_WITH(part, "xl/workbook.xml")
+            ) {
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
+    //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
+    xmlErrorPtr err = xmlGetLastError();
+    if (err != NULL) {
+        if (err->level == XML_ERR_FATAL) {
+            LOG_ERRORF("doc.c", "Got fatal XML error while parsing document: %s", err->message)
+            return -1;
+        } else {
+            LOG_ERRORF("doc.c", "Got recoverable XML error while parsing document: %s", err->message)
+        }
+    }
+
+    for (xmlNode *child = node; child; child = child->next) {
+        if (*child->name == 't' && *(child->name + 1) == '\0') {
+            xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1);
+
+            if (text) {
+                text_buffer_append_string0(buf, (char *) text);
+                text_buffer_append_char(buf, ' ');
+                xmlFree(text);
+            }
+        }
+
+        extract_text(xml, child->children, buf);
+    }
+}
+
+int xml_io_read(void *context, char *buffer, int len) {
+    struct archive *a = context;
+    return archive_read_data(a, buffer, len);
+}
+
+int xml_io_close(UNUSED(void *context)) {
+    //noop
+    return 0;
+}
+
+__always_inline
+static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
+
+    xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
+
+    if (xml == NULL) {
+        LOG_ERROR(doc->filepath, "Could not parse XML")
+        return -1;
+    }
+
+    xmlNode *root = xmlDocGetRootElement(xml);
+    if (root == NULL) {
+        LOG_ERROR(doc->filepath, "Empty document")
+        xmlFreeDoc(xml);
+        return -1;
+    }
+
+    extract_text(xml, root, buf);
+    xmlFreeDoc(xml);
+
+    return 0;
+}
+
+void parse_doc(void *mem, size_t mem_len, document_t *doc) {
+
+    if (mem == NULL) {
+        return;
+    }
+
+    struct archive *a = archive_read_new();
+    archive_read_support_format_zip(a);
+
+    int ret = archive_read_open_memory(a, mem, mem_len);
+    if (ret != ARCHIVE_OK) {
+        LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
+        archive_read_free(a);
+        return;
+    }
+
+    text_buffer_t buf = text_buffer_create(ScanCtx.content_size);
+
+    struct archive_entry *entry;
+    while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
+        if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
+            const char *path = archive_entry_pathname(entry);
+
+            if (should_read_part(path)) {
+                ret = read_part(a, &buf, doc);
+                if (ret != 0) {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (buf.dyn_buffer.cur > 0) {
+        text_buffer_terminate_string(&buf);
+
+        meta_line_t *meta = malloc(sizeof(meta_line_t) + buf.dyn_buffer.cur);
+        meta->key = MetaContent;
+        strcpy(meta->strval, buf.dyn_buffer.buf);
+        APPEND_META(doc, meta)
+    }
+
+    archive_read_close(a);
+    archive_read_free(a);
+    text_buffer_destroy(&buf);
+}
--- a/src/parsing/doc.h
+++ b/src/parsing/doc.h
@@ -0,0 +1,8 @@
+#ifndef SIST2_DOC_H
+#define SIST2_DOC_H
+
+#include "src/sist.h"
+
+void parse_doc(void *buf, size_t buf_len, document_t *doc);
+
+#endif
--- a/src/parsing/font.c
+++ b/src/parsing/font.c
@@ -1,11 +1,9 @@
 #include "font.h"

-#include "ft2build.h"
-#include "freetype/freetype.h"

 #include "src/ctx.h"

-__thread FT_Library library = NULL;
+__thread FT_Library ft_lib = NULL;


 typedef struct text_dimensions {
@@ -15,12 +13,12 @@ typedef struct text_dimensions {
 } text_dimensions_t;

 typedef struct glyph {
-    unsigned int top;
-    unsigned int height;
-    unsigned int width;
-    unsigned int descent;
-    unsigned int ascent;
-    unsigned int advance_width;
+    int top;
+    int height;
+    int width;
+    int descent;
+    int ascent;
+    int advance_width;
    unsigned char *pixmap;
 } glyph_t;

@@ -39,10 +37,10 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {

    glyph.pixmap = slot->bitmap.buffer;

-    glyph.width = slot->bitmap.width;
-    glyph.height = slot->bitmap.rows;
+    glyph.width = (int) slot->bitmap.width;
+    glyph.height = (int) slot->bitmap.rows;
    glyph.top = slot->bitmap_top;
-    glyph.advance_width = slot->advance.x / 64;
+    glyph.advance_width = (int) slot->advance.x / 64;

    glyph.descent = MAX(0, glyph.height - glyph.top);
    glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent);
@@ -50,10 +48,6 @@ glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) {
    return glyph;
 }

-__always_inline
-glyph_t get_glyph(char character, FT_Face face) {
-}
-
 text_dimensions_t text_dimension(char *text, FT_Face face) {
    text_dimensions_t dimensions;

@@ -62,7 +56,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
    int num_chars = (int) strlen(text);

    unsigned int max_ascent = 0;
-    unsigned int max_descent = 0;
+    int max_descent = 0;

    char pc = 0;
    for (int i = 0; i < num_chars; i++) {
@@ -72,7 +66,7 @@ text_dimensions_t text_dimension(char *text, FT_Face face) {
        glyph_t glyph = ft_glyph_to_glyph(face->glyph);

        max_descent = MAX(max_descent, glyph.descent);
-        max_ascent = MAX(max_ascent, glyph.ascent);
+        max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent));

        int kerning_x = kerning_offset(c, pc, face);
        dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x;
@@ -143,20 +137,29 @@ void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned
 }

 void parse_font(const char *buf, size_t buf_len, document_t *doc) {
-    if (library == NULL) {
-        FT_Init_FreeType(&library);
+    if (ft_lib == NULL) {
+        FT_Init_FreeType(&ft_lib);
+    }
+
+    if (buf == NULL) {
+        return;
    }

    FT_Face face;
-    FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
+    FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
    if (err != 0) {
+        LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
        return;
    }

    char font_name[1024];

    if (face->style_name == NULL || *(face->style_name) == '?') {
-        strcpy(font_name, face->family_name);
+        if (face->family_name == NULL) {
+            strcpy(font_name, "(null)");
+        } else {
+            strcpy(font_name, face->family_name);
+        }
    } else {
        snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name);
    }
@@ -166,11 +169,18 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
    strcpy(meta_name->strval, font_name);
    APPEND_META(doc, meta_name)

+    if (ScanCtx.tn_size <= 0) {
+        FT_Done_Face(face);
+        return;
+    }
+
    int pixel = 64;
    int num_chars = (int) strlen(font_name);

    err = FT_Set_Pixel_Sizes(face, 0, pixel);
    if (err != 0) {
+        LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
+        FT_Done_Face(face);
        return;
    }

@@ -186,11 +196,19 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {

        err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
        if (err != 0) {
-            continue;
+            c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
+            err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
+            if (err != 0) {
+                LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
+                continue;
+            }
        }
        glyph_t glyph = ft_glyph_to_glyph(face->glyph);

        pen.x += kerning_offset(c, pc, face);
+        if (pen.x <= 0) {
+            pen.x = ABS(glyph.advance_width - glyph.width);
+        }
        pen.y = dimensions.height - glyph.ascent - dimensions.baseline;

        draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap);
@@ -209,3 +227,7 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {

    FT_Done_Face(face);
 }
+
+void cleanup_font() {
+    FT_Done_FreeType(ft_lib);
+}
--- a/src/parsing/font.h
+++ b/src/parsing/font.h
@@ -5,5 +5,6 @@


 void parse_font(const char * buf, size_t buf_len, document_t *doc);
+void cleanup_font();

 #endif
--- a/src/parsing/media.c
+++ b/src/parsing/media.c
@@ -1,7 +1,11 @@
 #include "src/sist.h"
 #include "src/ctx.h"

-AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
+#define MIN_SIZE 32
+#define AVIO_BUF_SIZE 8192
+
+__always_inline
+static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {

    AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
    AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
@@ -22,8 +26,8 @@ AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
    return jpeg;
 }

+__always_inline
 AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
-    AVFrame *scaled_frame = av_frame_alloc();

    int dstW;
    int dstH;
@@ -41,16 +45,22 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
        }
    }

+    if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
+        return NULL;
+    }
+
+    AVFrame *scaled_frame = av_frame_alloc();
+
    struct SwsContext *ctx = sws_getContext(
            decoder->width, decoder->height, decoder->pix_fmt,
            dstW, dstH, AV_PIX_FMT_YUVJ420P,
            SWS_FAST_BILINEAR, 0, 0, 0
    );

-    int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
+    int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
    uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);

-    avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
+    av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);

    sws_scale(ctx,
              (const uint8_t *const *) frame->data, frame->linesize,
@@ -67,7 +77,8 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
    return scaled_frame;
 }

-AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx) {
+__always_inline
+static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
    AVFrame *frame = av_frame_alloc();

    AVPacket avPacket;
@@ -81,7 +92,10 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st

            if (read_frame_ret != 0) {
                if (read_frame_ret != AVERROR_EOF) {
-                    fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
+                    LOG_WARNINGF(doc->filepath,
+                                 "(media.c) avcodec_read_frame() returned error code [%d] %s",
+                                 read_frame_ret, av_err2str(read_frame_ret)
+                    )
                }
                av_frame_free(&frame);
                av_packet_unref(&avPacket);
@@ -99,7 +113,13 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
        // Feed it to decoder
        int decode_ret = avcodec_send_packet(decoder, &avPacket);
        if (decode_ret != 0) {
-            printf("Error decoding frame: %s\n", av_err2str(decode_ret));
+            LOG_ERRORF(doc->filepath,
+                         "(media.c) avcodec_send_packet() returned error code [%d] %s",
+                         decode_ret, av_err2str(decode_ret)
+            )
+            av_frame_free(&frame);
+            av_packet_unref(&avPacket);
+            return NULL;
        }
        av_packet_unref(&avPacket);
        receive_ret = avcodec_receive_frame(decoder, frame);
@@ -107,61 +127,102 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
    return frame;
 }

-void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
+#define APPEND_TAG_META(doc, tag_, keyname) \
+    text_buffer_t tex = text_buffer_create(-1); \
+    text_buffer_append_string0(&tex, tag_->value); \
+    text_buffer_terminate_string(&tex); \
+    meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
+    meta_tag->key = keyname; \
+    strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
+    APPEND_META(doc, meta_tag) \
+    text_buffer_destroy(&tex);
+
+__always_inline
+static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {

    AVDictionaryEntry *tag = NULL;
    while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
-        char *key = tag->key;
-        for (; *key; ++key) *key = (char) tolower(*key);
+        char key[256];
+        strncpy(key, tag->key, sizeof(key));

-        if (strcmp(tag->key, "artist") == 0) {
-            size_t len = strlen(tag->value);
-            meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
-            meta_tag->key = MetaArtist;
-            memcpy(meta_tag->strval, tag->value, len);
-            APPEND_META(doc, meta_tag)
-        } else if (strcmp(tag->key, "genre") == 0) {
-            size_t len = strlen(tag->value);
-            meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
-            meta_tag->key = MetaGenre;
-            memcpy(meta_tag->strval, tag->value, len);
-            APPEND_META(doc, meta_tag)
-        } else if (strcmp(tag->key, "title") == 0) {
-            size_t len = strlen(tag->value);
-            meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
-            meta_tag->key = MetaTitle;
-            memcpy(meta_tag->strval, tag->value, len);
-            APPEND_META(doc, meta_tag)
-        } else if (strcmp(tag->key, "album_artist") == 0) {
-            size_t len = strlen(tag->value);
-            meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
-            meta_tag->key = MetaAlbumArtist;
-            memcpy(meta_tag->strval, tag->value, len);
-            APPEND_META(doc, meta_tag)
-        } else if (strcmp(tag->key, "album") == 0) {
-            size_t len = strlen(tag->value);
-            meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + len);
-            meta_tag->key = MetaAlbum;
-            memcpy(meta_tag->strval, tag->value, len);
-            APPEND_META(doc, meta_tag)
+        char *ptr = key;
+        for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
+
+        if (strcmp(key, "artist") == 0) {
+            APPEND_TAG_META(doc, tag, MetaArtist)
+        } else if (strcmp(key, "genre") == 0) {
+            APPEND_TAG_META(doc, tag, MetaGenre)
+        } else if (strcmp(key, "title") == 0) {
+            APPEND_TAG_META(doc, tag, MetaTitle)
+        } else if (strcmp(key, "album_artist") == 0) {
+            APPEND_TAG_META(doc, tag, MetaAlbumArtist)
+        } else if (strcmp(key, "album") == 0) {
+            APPEND_TAG_META(doc, tag, MetaAlbum)
        }
    }
 }

-void parse_media(const char *filepath, document_t *doc) {
+__always_inline
+static void
+append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) {
+
+    if (is_video) {
+        meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
+        meta_duration->key = MetaMediaDuration;
+        meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
+        APPEND_META(doc, meta_duration)
+
+        meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
+        meta_bitrate->key = MetaMediaBitrate;
+        meta_bitrate->longval = pFormatCtx->bit_rate;
+        APPEND_META(doc, meta_bitrate)
+    }
+
+    AVDictionaryEntry *tag = NULL;
+    if (is_video) {
+        while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
+            if (include_audio_tags && strcmp(tag->key, "title") == 0) {
+                APPEND_TAG_META(doc, tag, MetaTitle)
+            } else if (strcmp(tag->key, "comment") == 0) {
+                APPEND_TAG_META(doc, tag, MetaContent)
+            } else if (include_audio_tags && strcmp(tag->key, "artist") == 0) {
+                APPEND_TAG_META(doc, tag, MetaArtist)
+            }
+        }
+    } else {
+        // EXIF metadata
+        while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
+            if (include_audio_tags && strcmp(tag->key, "Artist") == 0) {
+                APPEND_TAG_META(doc, tag, MetaArtist)
+            } else if (strcmp(tag->key, "ImageDescription") == 0) {
+                APPEND_TAG_META(doc, tag, MetaContent)
+            } else if (strcmp(tag->key, "Make") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifMake)
+            } else if (strcmp(tag->key, "Model") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifModel)
+            } else if (strcmp(tag->key, "Software") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifSoftware)
+            } else if (strcmp(tag->key, "FNumber") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifFNumber)
+            } else if (strcmp(tag->key, "FocalLength") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifFocalLength)
+            } else if (strcmp(tag->key, "UserComment") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifUserComment)
+            } else if (strcmp(tag->key, "ISOSpeedRatings") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings)
+            } else if (strcmp(tag->key, "ExposureTime") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifExposureTime)
+            } else if (strcmp(tag->key, "DateTime") == 0) {
+                APPEND_TAG_META(doc, tag, MetaExifDateTime)
+            }
+        }
+    }
+}
+
+void parse_media(AVFormatContext *pFormatCtx, document_t *doc) {

    int video_stream = -1;
-
-    AVFormatContext *pFormatCtx = avformat_alloc_context();
-    if (pFormatCtx == NULL) {
-        fprintf(stderr, "Could not allocate AVFormatContext! %s \n", filepath);
-        return;
-    }
-    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
-    if (res < 0) {
-        printf("ERR%s %s\n", filepath, av_err2str(res));
-        return;
-    }
+    int audio_stream = -1;

    avformat_find_stream_info(pFormatCtx, NULL);

@@ -169,51 +230,42 @@ void parse_media(const char *filepath, document_t *doc) {
        AVStream *stream = pFormatCtx->streams[i];

        if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
-            meta_audio->key = MetaMediaAudioCodec;
-            meta_audio->intval = stream->codecpar->codec_id;
-            APPEND_META(doc, meta_audio)
-
-            append_audio_meta(pFormatCtx, doc);
+            if (audio_stream == -1) {
+                meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
+                meta_audio->key = MetaMediaAudioCodec;
+                meta_audio->intval = stream->codecpar->codec_id;
+                APPEND_META(doc, meta_audio)

+                append_audio_meta(pFormatCtx, doc);
+                audio_stream = i;
+            }
        } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {

-            meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
-            meta_vid->key = MetaMediaVideoCodec;
-            meta_vid->intval = stream->codecpar->codec_id;
-            APPEND_META(doc, meta_vid)
+            if (video_stream == -1) {
+                meta_line_t *meta_vid = malloc(sizeof(meta_line_t));
+                meta_vid->key = MetaMediaVideoCodec;
+                meta_vid->intval = stream->codecpar->codec_id;
+                APPEND_META(doc, meta_vid)

-            meta_line_t *meta_w = malloc(sizeof(meta_line_t));
-            meta_w->key = MetaWidth;
-            meta_w->intval = stream->codecpar->width;
-            APPEND_META(doc, meta_w)
+                meta_line_t *meta_w = malloc(sizeof(meta_line_t));
+                meta_w->key = MetaWidth;
+                meta_w->intval = stream->codecpar->width;
+                APPEND_META(doc, meta_w)

-            meta_line_t *meta_h = malloc(sizeof(meta_line_t));
-            meta_h->key = MetaHeight;
-            meta_h->intval = stream->codecpar->height;
-            APPEND_META(doc, meta_h)
+                meta_line_t *meta_h = malloc(sizeof(meta_line_t));
+                meta_h->key = MetaHeight;
+                meta_h->intval = stream->codecpar->height;
+                APPEND_META(doc, meta_h)

-            video_stream = i;
+                video_stream = i;
+            }
        }
    }

-    if (video_stream != -1) {
+    if (video_stream != -1 && ScanCtx.tn_size > 0) {
        AVStream *stream = pFormatCtx->streams[video_stream];

-        if (stream->nb_frames > 1) {
-            //This is a video (not a still image)
-            meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
-            meta_duration->key = MetaMediaDuration;
-            meta_duration->longval = pFormatCtx->duration / AV_TIME_BASE;
-            APPEND_META(doc, meta_duration)
-
-            meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
-            meta_bitrate->key = MetaMediaBitrate;
-            meta_bitrate->intval = pFormatCtx->bit_rate;
-            APPEND_META(doc, meta_bitrate)
-        }
-
-        if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
+        if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
            avformat_close_input(&pFormatCtx);
            avformat_free_context(pFormatCtx);
            return;
@@ -237,7 +289,7 @@ void parse_media(const char *filepath, document_t *doc) {
            }
        }

-        AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream);
+        AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
        if (frame == NULL) {
            avcodec_free_context(&decoder);
            avformat_close_input(&pFormatCtx);
@@ -245,9 +297,19 @@ void parse_media(const char *filepath, document_t *doc) {
            return;
        }

+        append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1);
+
        // Scale frame
        AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);

+        if (scaled_frame == NULL) {
+            av_frame_free(&frame);
+            avcodec_free_context(&decoder);
+            avformat_close_input(&pFormatCtx);
+            avformat_free_context(pFormatCtx);
+            return;
+        }
+
        // Encode frame to jpeg
        AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
        avcodec_send_frame(jpeg_encoder, scaled_frame);
@@ -257,7 +319,8 @@ void parse_media(const char *filepath, document_t *doc) {
        avcodec_receive_packet(jpeg_encoder, &jpeg_packet);

        // Save thumbnail
-        store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
+        store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
+                    jpeg_packet.size);

        av_packet_unref(&jpeg_packet);
        av_frame_free(&frame);
@@ -271,3 +334,69 @@ void parse_media(const char *filepath, document_t *doc) {
    avformat_free_context(pFormatCtx);
 }

+void parse_media_filename(const char *filepath, document_t *doc) {
+
+    AVFormatContext *pFormatCtx = avformat_alloc_context();
+    if (pFormatCtx == NULL) {
+        LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
+        return;
+    }
+    int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
+    if (res < 0) {
+        LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
+        avformat_close_input(&pFormatCtx);
+        avformat_free_context(pFormatCtx);
+        return;
+    }
+
+    parse_media(pFormatCtx, doc);
+}
+
+
+int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
+    struct vfile *f = ptr;
+
+    int ret = f->read(f, buf, buf_size);
+
+    if (ret == 0) {
+        return AVERROR_EOF;
+    }
+    return ret;
+}
+
+void parse_media_vfile(struct vfile *f, document_t *doc) {
+
+    AVFormatContext *pFormatCtx = avformat_alloc_context();
+    if (pFormatCtx == NULL) {
+        LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
+        return;
+    }
+
+    unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
+    AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
+
+    pFormatCtx->pb = io_ctx;
+    pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
+
+    int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
+    if (res == -5) {
+        // Tried to parse media that requires seek
+        av_free(io_ctx->buffer);
+        avio_context_free(&io_ctx);
+        avformat_close_input(&pFormatCtx);
+        avformat_free_context(pFormatCtx);
+        return;
+    } else if (res < 0) {
+        LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
+        av_free(io_ctx->buffer);
+        avio_context_free(&io_ctx);
+        avformat_close_input(&pFormatCtx);
+        avformat_free_context(pFormatCtx);
+        return;
+    }
+
+    parse_media(pFormatCtx, doc);
+    av_free(io_ctx->buffer);
+    avio_context_free(&io_ctx);
+}
+
--- a/src/parsing/media.h
+++ b/src/parsing/media.h
@@ -5,7 +5,10 @@
 #include "src/sist.h"

 #define MIN_VIDEO_SIZE 1024 * 64
+#define MIN_IMAGE_SIZE 1024 * 2

-void parse_media(const char * filepath, document_t *doc);
+void parse_media_filename(const char * filepath, document_t *doc);
+
+void parse_media_vfile(struct vfile *f, document_t *doc);

 #endif
--- a/src/parsing/mime.c
+++ b/src/parsing/mime.c
@@ -1,10 +1,12 @@
 #include "mime.h"

 unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
-    char lower[64];
+    char lower[8];
    char *p = lower;
-    while ((*ext)) {
+    int cnt = 0;
+    while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
        *p++ = (char)tolower(*ext++);
+        cnt++;
    }
    *p = '\0';
    return (size_t) g_hash_table_lookup(ext_table, lower);
--- a/src/parsing/mime.h
+++ b/src/parsing/mime.h
@@ -8,7 +8,7 @@
 #define MIME_EMPTY 1

 #define DONT_PARSE 0x80000000
-#define SHOULD_PARSE(mime_id) (mime_id & DONT_PARSE) != DONT_PARSE
+#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)

 #define PDF_MASK 0x40000000
 #define IS_PDF(mime_id) (mime_id & PDF_MASK) == PDF_MASK
@@ -16,6 +16,15 @@
 #define FONT_MASK 0x20000000
 #define IS_FONT(mime_id) (mime_id & FONT_MASK) == FONT_MASK

+#define ARC_MASK 0x10000000
+#define IS_ARC(mime_id) (mime_id & ARC_MASK) == ARC_MASK
+
+#define ARC_FILTER_MASK 0x08000000
+#define IS_ARC_FILTER(mime_id) (mime_id & ARC_FILTER_MASK) == ARC_FILTER_MASK
+
+#define DOC_MASK 0x04000000
+#define IS_DOC(mime_id) (mime_id & DOC_MASK) == DOC_MASK
+
 enum major_mime {
    MimeInvalid = 0,
    MimeModel = 1,
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -1,9 +1,30 @@
 #include "src/sist.h"
 #include "src/ctx.h"

-__thread magic_t Magic;
+__thread magic_t Magic = NULL;

-void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
+int fs_read(struct vfile *f, void *buf, size_t size) {
+
+    if (f->fd == -1) {
+        f->fd = open(f->filepath, O_RDONLY);
+        if (f->fd == -1) {
+            LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
+            return -1;
+        }
+    }
+
+    return read(f->fd, buf, size);
+}
+
+#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
+
+void fs_close(struct vfile *f) {
+    if (f->fd != -1) {
+        close(f->fd);
+    }
+}
+
+void *read_all(parse_job_t *job, const char *buf, int bytes_read) {

    void *full_buf;

@@ -11,20 +32,19 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
        full_buf = malloc(job->info.st_size);
        memcpy(full_buf, buf, job->info.st_size);
    } else {
-        if (*fd == -1) {
-            *fd = open(job->filepath, O_RDONLY);
-            if (*fd == -1) {
-                perror("open");
-                printf("%s\n", job->filepath);
-                free(job);
-                return NULL;
-            }
-        }
        full_buf = malloc(job->info.st_size);
        memcpy(full_buf, buf, bytes_read);
-        int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read);
-        if (ret == -1) {
-            perror("read");
+
+        int ret = job->vfile.read(&job->vfile, full_buf + bytes_read, job->info.st_size - bytes_read);
+        if (ret < 0) {
+            free(full_buf);
+
+            if (job->vfile.is_fs_file) {
+                LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
+            } else {
+                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", ret, archive_error_string(job->vfile.arc))
+            }
+            return NULL;
        }
    }

@@ -36,9 +56,9 @@ void parse(void *arg) {
    parse_job_t *job = arg;
    document_t doc;

-    if (incremental_get(ScanCtx.original_table, job->info.st_ino) == job->info.st_mtim.tv_sec) {
+    int inc_ts = incremental_get(ScanCtx.original_table, job->info.st_ino);
+    if (inc_ts != 0 && inc_ts == job->info.st_mtim.tv_sec) {
        incremental_mark_file_for_copy(ScanCtx.copy_table, job->info.st_ino);
-        free(job);
        return;
    }

@@ -57,70 +77,124 @@ void parse(void *arg) {
    doc.ino = job->info.st_ino;
    doc.mtime = job->info.st_mtim.tv_sec;

-    uuid_generate_time_safe(doc.uuid);
+    uuid_generate(doc.uuid);
    char *buf[PARSE_BUF_SIZE];

+    if (LogCtx.very_verbose) {
+        char uuid_str[UUID_STR_LEN];
+        uuid_unparse(doc.uuid, uuid_str);
+        LOG_DEBUGF(job->filepath, "Starting parse job {%s}", uuid_str)
+    }
+
    if (job->info.st_size == 0) {
        doc.mime = MIME_EMPTY;
-    } else if (*(job->filepath + job->ext) != '\0') {
+    } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
        doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
    }

-    int fd = -1;
    int bytes_read = 0;

-    if (doc.mime == 0) {
+    if (doc.mime == 0 && !ScanCtx.fast) {
        // Get mime type with libmagic
-        fd = open(job->filepath, O_RDONLY);
-        if (fd == -1) {
-            perror("open");
-            free(job);
+        bytes_read = job->vfile.read(&job->vfile, buf, PARSE_BUF_SIZE);
+        if (bytes_read < 0) {
+
+            if (job->vfile.is_fs_file) {
+                LOG_ERRORF(job->filepath, "read(): [%d] %s", errno, strerror(errno))
+            } else {
+                LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
+            }
+
+            CLOSE_FILE(job->vfile)
            return;
        }

-        bytes_read = read(fd, buf, PARSE_BUF_SIZE);
-
        const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
        if (magic_mime_str != NULL) {
            doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
+
+            LOG_DEBUGF(job->filepath, "libmagic: %s", magic_mime_str);
+
            if (doc.mime == 0) {
-                fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
+                LOG_WARNINGF(job->filepath, "Couldn't find mime %s", magic_mime_str);
            }
        }
+
+        magic_close(Magic);
+        Magic = NULL;
    }

    int mmime = MAJOR_MIME(doc.mime);

    if (!(SHOULD_PARSE(doc.mime))) {

-    } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
-        parse_media(job->filepath, &doc);
+    } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
+               (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
+
+        if (job->vfile.is_fs_file) {
+            parse_media_filename(job->filepath, &doc);
+        } else {
+            parse_media_vfile(&job->vfile, &doc);
+        }

    } else if (IS_PDF(doc.mime)) {
-        void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
+        void *pdf_buf = read_all(job, (char *) buf, bytes_read);
        parse_pdf(pdf_buf, doc.size, &doc);

-        if (pdf_buf != buf) {
+        if (pdf_buf != buf && pdf_buf != NULL) {
            free(pdf_buf);
        }

    } else if (mmime == MimeText && ScanCtx.content_size > 0) {
-        parse_text(bytes_read, &fd, (char *) buf, &doc);
+        parse_text(bytes_read, &job->vfile, (char *) buf, &doc);

    } else if (IS_FONT(doc.mime)) {
-        void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
+        void *font_buf = read_all(job, (char *) buf, bytes_read);
        parse_font(font_buf, doc.size, &doc);

-        if (font_buf != buf) {
+        if (font_buf != buf && font_buf != NULL) {
            free(font_buf);
        }
+    } else if (
+            ScanCtx.archive_mode != ARC_MODE_SKIP && (
+                    IS_ARC(doc.mime) ||
+                    (IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
+            )) {
+        parse_archive(&job->vfile, &doc);
+    } else if (ScanCtx.content_size > 0 && IS_DOC(doc.mime)) {
+        void *doc_buf = read_all(job, (char *) buf, bytes_read);
+        parse_doc(doc_buf, doc.size, &doc);
+
+        if (doc_buf != buf && doc_buf != NULL) {
+            free(doc_buf);
+        }
+    } else if (is_cbr(doc.mime)) {
+        void *cbr_buf = read_all(job, (char *) buf, bytes_read);
+        parse_cbr(cbr_buf, doc.size, &doc);
+
+        if (cbr_buf != buf && cbr_buf != NULL) {
+            free(cbr_buf);
+        }
+    }
+
+    //Parent meta
+    if (!uuid_is_null(job->parent)) {
+        char tmp[UUID_STR_LEN];
+        uuid_unparse(job->parent, tmp);
+
+        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
+        meta_parent->key = MetaParent;
+        strcpy(meta_parent->strval, tmp);
+        APPEND_META((&doc), meta_parent)
    }

    write_document(&doc);

-    if (fd != -1) {
-        close(fd);
-    }
-
-    free(job);
+    CLOSE_FILE(job->vfile)
+}
+
+void cleanup_parse() {
+    if (Magic != NULL) {
+        magic_close(Magic);
+    }
 }
--- a/src/parsing/parse.h
+++ b/src/parsing/parse.h
@@ -5,6 +5,11 @@

 #define PARSE_BUF_SIZE 4096

+int fs_read(struct vfile *f, void *buf, size_t size);
+void fs_close(struct vfile *f);
+
 void parse(void *arg);

+void cleanup_parse();
+
 #endif
--- a/src/parsing/pdf.c
+++ b/src/parsing/pdf.c
@@ -1,10 +1,29 @@
-#include <src/ctx.h>
 #include "pdf.h"
 #include "src/ctx.h"

-fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
+#define MIN_OCR_SIZE 350
+#define MIN_OCR_LEN 10
+__thread text_buffer_t thread_buffer;
+
+
+int render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
+
+    int err = 0;
+    fz_page *cover = NULL;
+
+    fz_var(cover);
+    fz_var(err);
+    fz_try(ctx)
+        cover = fz_load_page(ctx, fzdoc, 0);
+    fz_catch(ctx)
+        err = 1;
+
+    if (err != 0) {
+        fz_drop_page(ctx, cover);
+        LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
+        return FALSE;
+    }

-    fz_page *cover = fz_load_page(ctx, fzdoc, 0);
    fz_rect bounds = fz_bound_page(ctx, cover);

    float scale;
@@ -24,32 +43,136 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
    fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
    fz_device *dev = fz_new_draw_device(ctx, m, pixmap);

-    pthread_mutex_lock(&ScanCtx.mupdf_mu);
+    fz_var(err);
    fz_try(ctx)
+    {
+        pthread_mutex_lock(&ScanCtx.mupdf_mu);
        fz_run_page(ctx, cover, dev, fz_identity, NULL);
+    }
    fz_always(ctx)
-            pthread_mutex_unlock(&ScanCtx.mupdf_mu);
+    {
+        fz_close_device(ctx, dev);
+        fz_drop_device(ctx, dev);
+        pthread_mutex_unlock(&ScanCtx.mupdf_mu);
+    }
    fz_catch(ctx)
-        fz_rethrow(ctx);
+        err = ctx->error.errcode;

-    fz_drop_device(ctx, dev);
+    if (err != 0) {
+        LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
+        fz_drop_page(ctx, cover);
+        fz_drop_pixmap(ctx, pixmap);
+        return FALSE;
+    }

-    fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
-    unsigned char *tn_buf;
-    size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
+    fz_buffer *fzbuf = NULL;
+    fz_var(fzbuf);
+    fz_var(err);

-    store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
+    fz_try(ctx)
+        fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
+    fz_catch(ctx)
+        err = ctx->error.errcode;
+
+    if (err == 0) {
+        unsigned char *tn_buf;
+        size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
+        store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
+    }

-    fz_drop_pixmap(ctx, pixmap);
    fz_drop_buffer(ctx, fzbuf);
+    fz_drop_pixmap(ctx, pixmap);
+    fz_drop_page(ctx, cover);

-    return cover;
+    if (err != 0) {
+        LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
+                     ctx->error.message)
+        return FALSE;
+    }
+
+    return TRUE;
 }

-void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
+void fz_err_callback(void *user, UNUSED(const char *message)) {
+    if (LogCtx.verbose) {
+        document_t *doc = (document_t *) user;
+        LOG_WARNINGF(doc->filepath, "FZ: %s", message)
+    }
+}

+__always_inline
+static void init_ctx(fz_context *ctx, document_t *doc) {
+    fz_disable_icc(ctx);
+    fz_register_document_handlers(ctx);

-void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
+    ctx->warn.print_user = doc;
+    ctx->warn.print = fz_err_callback;
+    ctx->error.print_user = doc;
+    ctx->error.print = fz_err_callback;
+}
+
+__always_inline
+static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
+    if (block->type != FZ_STEXT_BLOCK_TEXT) {
+        return 0;
+    }
+
+    fz_stext_line *line = block->u.t.first_line;
+    while (line != NULL) {
+        fz_stext_char *c = line->first_char;
+        while (c != NULL) {
+            if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
+                return TEXT_BUF_FULL;
+            }
+            c = c->next;
+        }
+        line = line->next;
+    }
+    return 0;
+}
+
+#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32)
+
+void fill_image(fz_context *ctx, UNUSED(fz_device *dev),
+                fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
+                UNUSED(fz_color_params color_params)) {
+
+    int l2factor = 0;
+
+    if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
+
+        fz_pixmap *pix = img->get_pixmap(ctx, img, NULL, img->w, img->h, &l2factor);
+
+        if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
+            TessBaseAPI *api = TessBaseAPICreate();
+            TessBaseAPIInit3(api, ScanCtx.tesseract_path, ScanCtx.tesseract_lang);
+
+            TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
+            TessBaseAPISetSourceResolution(api, pix->xres);
+
+            char *text = TessBaseAPIGetUTF8Text(api);
+            size_t len = strlen(text);
+            if (len >= MIN_OCR_LEN) {
+                text_buffer_append_string(&thread_buffer, text, len - 1);
+                LOG_DEBUGF(
+                        "pdf.c",
+                        "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
+                        pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
+                )
+            }
+
+            TessBaseAPIEnd(api);
+            TessBaseAPIDelete(api);
+        }
+        fz_drop_pixmap(ctx, pix);
+    }
+}
+
+void parse_pdf(const void *buf, size_t buf_len, document_t *doc) {
+
+    if (buf == NULL) {
+        return;
+    }

    static int mu_is_initialized = 0;
    if (!mu_is_initialized) {
@@ -57,95 +180,152 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
        mu_is_initialized = 1;
    }
    fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
-    fz_stream *stream = NULL;
-    fz_document *fzdoc = NULL;

-    fz_var(stream);
+    init_ctx(ctx, doc);
+
+    int err = 0;
+
+    fz_document *fzdoc = NULL;
+    fz_stream *stream = NULL;
    fz_var(fzdoc);
+    fz_var(stream);
+    fz_var(err);

    fz_try(ctx)
    {
-        fz_disable_icc(ctx);
-        fz_register_document_handlers(ctx);
-
-        //disable warnings
-        ctx->warn.print = fz_noop_callback;
-        ctx->error.print = fz_noop_callback;
-
        stream = fz_open_memory(ctx, buf, buf_len);
        fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
+    }
+    fz_catch(ctx)
+        err = ctx->error.errcode;

-        int page_count = fz_count_pages(ctx, fzdoc);
+    if (err != 0) {
+        fz_drop_stream(ctx, stream);
+        fz_drop_document(ctx, fzdoc);
+        fz_drop_context(ctx);
+        return;
+    }

-        fz_page *cover = render_cover(ctx, doc, fzdoc);
+    char title[4096] = {'\0',};
+    fz_try(ctx)
+        fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
+    fz_catch(ctx)
+        ;

-        fz_stext_options opts;
+    if (strlen(title) > 0) {
+        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
+        meta_content->key = MetaTitle;
+        strcpy(meta_content->strval, title);
+        APPEND_META(doc, meta_content)
+    }

-        text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
+    int page_count = -1;
+    fz_var(err);
+    fz_try(ctx)
+        page_count = fz_count_pages(ctx, fzdoc);
+    fz_catch(ctx)
+        err = ctx->error.errcode;
+
+    if (err) {
+        LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
+        fz_drop_stream(ctx, stream);
+        fz_drop_document(ctx, fzdoc);
+        fz_drop_context(ctx);
+        return;
+    }
+
+    if (ScanCtx.tn_size > 0) {
+        err = render_cover(ctx, doc, fzdoc);
+    }
+
+    if (err == TRUE) {
+        fz_drop_stream(ctx, stream);
+        fz_drop_document(ctx, fzdoc);
+        fz_drop_context(ctx);
+        return;
+    }
+
+    if (ScanCtx.content_size > 0) {
+        fz_stext_options opts = {0};
+        thread_buffer = text_buffer_create(ScanCtx.content_size);

        for (int current_page = 0; current_page < page_count; current_page++) {
-            fz_page *page;
-            if (current_page == 0) {
-                page = cover;
-            } else {
+            fz_page *page = NULL;
+            fz_var(err);
+            fz_try(ctx)
                page = fz_load_page(ctx, fzdoc, current_page);
+            fz_catch(ctx)
+                err = ctx->error.errcode;
+            if (err != 0) {
+                LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
+                text_buffer_destroy(&thread_buffer);
+                fz_drop_page(ctx, page);
+                fz_drop_stream(ctx, stream);
+                fz_drop_document(ctx, fzdoc);
+                fz_drop_context(ctx);
+                return;
            }

            fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
            fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
+            dev->stroke_path = NULL;
+            dev->stroke_text = NULL;
+            dev->clip_text = NULL;
+            dev->clip_stroke_path = NULL;
+            dev->clip_stroke_text = NULL;

-            pthread_mutex_lock(&ScanCtx.mupdf_mu);
+            if (ScanCtx.tesseract_lang != NULL) {
+                dev->fill_image = fill_image;
+            }
+
+            fz_var(err);
            fz_try(ctx)
-                fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
+                fz_run_page(ctx, page, dev, fz_identity, NULL);
            fz_always(ctx)
-                pthread_mutex_unlock(&ScanCtx.mupdf_mu);
+            {
+                fz_close_device(ctx, dev);
+                fz_drop_device(ctx, dev);
+            }
            fz_catch(ctx)
-                fz_rethrow(ctx);
+                err = ctx->error.errcode;

-            fz_drop_device(ctx, dev);
+            if (err != 0) {
+                LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
+                text_buffer_destroy(&thread_buffer);
+                fz_drop_page(ctx, page);
+                fz_drop_stext_page(ctx, stext);
+                fz_drop_stream(ctx, stream);
+                fz_drop_document(ctx, fzdoc);
+                fz_drop_context(ctx);
+                return;
+            }

            fz_stext_block *block = stext->first_block;
            while (block != NULL) {
-
-                if (block->type != FZ_STEXT_BLOCK_TEXT) {
-                    block = block->next;
-                    continue;
-                }
-
-                fz_stext_line *line = block->u.t.first_line;
-                while (line != NULL) {
-                    fz_stext_char *c = line->first_char;
-                    while (c != NULL) {
-                        if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
-                            fz_drop_page(ctx, page);
-                            fz_drop_stext_page(ctx, stext);
-                            goto write_loop_end;
-                        }
-                        c = c->next;
-                    }
-                    line = line->next;
+                int ret = read_stext_block(block, &thread_buffer);
+                if (ret == TEXT_BUF_FULL) {
+                    break;
                }
                block = block->next;
            }
-            fz_drop_page(ctx, page);
            fz_drop_stext_page(ctx, stext);
+            fz_drop_page(ctx, page);
+
+            if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) {
+                break;
+            }
        }
-        write_loop_end:;
-        text_buffer_terminate_string(&text_buf);
+        text_buffer_terminate_string(&thread_buffer);

-        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
+        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur);
        meta_content->key = MetaContent;
-        memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
-        text_buffer_destroy(&text_buf);
+        memcpy(meta_content->strval, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur);
        APPEND_META(doc, meta_content)
-    }
-    fz_always(ctx)
-    {
-        fz_drop_stream(ctx, stream);
-        fz_drop_document(ctx, fzdoc);
-        fz_drop_context(ctx);
-    } fz_catch(ctx) {
-        fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
-    }
-}

+        text_buffer_destroy(&thread_buffer);
+    }
+
+    fz_drop_stream(ctx, stream);
+    fz_drop_document(ctx, fzdoc);
+    fz_drop_context(ctx);
+}
--- a/src/parsing/pdf.h
+++ b/src/parsing/pdf.h
@@ -4,6 +4,6 @@
 #include "src/sist.h"


-void parse_pdf(void *buf, size_t buf_len, document_t *doc);
+void parse_pdf(const void *buf, size_t buf_len, document_t *doc);

 #endif
--- a/src/parsing/text.c
+++ b/src/parsing/text.c
@@ -1,7 +1,7 @@
 #include "text.h"
 #include "src/ctx.h"

-void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
+void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc) {

    char *intermediate_buf;
    int intermediate_buf_len;
@@ -13,10 +13,6 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
        memcpy(intermediate_buf, buf, to_copy);

    } else {
-        if (*fd == -1) {
-            *fd = open(doc->filepath, O_RDONLY);
-        }
-
        int to_read = MIN(ScanCtx.content_size, doc->size) - bytes_read;

        intermediate_buf = malloc(to_read + bytes_read);
@@ -25,19 +21,17 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
            memcpy(intermediate_buf, buf, bytes_read);
        }

-        read(*fd, intermediate_buf + bytes_read, to_read);
+        f->read(f, intermediate_buf + bytes_read, to_read);
    }
+    text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
+    text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
+    text_buffer_terminate_string(&tex);

-    text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
-    for (int i = 0; i < intermediate_buf_len; i++) {
-        text_buffer_append_char(&text_buf, *(intermediate_buf + i));
-    }
-    text_buffer_terminate_string(&text_buf);
-
-    meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
+    meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
    meta->key = MetaContent;
-    strcpy(meta->strval, text_buf.dyn_buffer.buf);
-    text_buffer_destroy(&text_buf);
-    free(intermediate_buf);
+    strcpy(meta->strval, tex.dyn_buffer.buf);
    APPEND_META(doc, meta)
+
+    free(intermediate_buf);
+    text_buffer_destroy(&tex);
 }
--- a/src/parsing/text.h
+++ b/src/parsing/text.h
@@ -3,6 +3,6 @@

 #include "src/sist.h"

-void parse_text(int bytes_read, int *fd, char *buf, document_t *doc);
+void parse_text(int bytes_read, struct vfile *f, char *buf, document_t *doc);

 #endif
--- a/src/sist.h
+++ b/src/sist.h
@@ -2,6 +2,7 @@
 #define SIST_H

 #define UUID_STR_LEN 37
+#define UNUSED(x) __attribute__((__unused__))  x

 #include <glib-2.0/glib.h>
 #include <unistd.h>
@@ -12,10 +13,11 @@
 #include <ftw.h>
 #include <uuid.h>
 #include <magic.h>
-#include <libavformat/avformat.h>
-#include <libswscale/swscale.h>
-#include <libswresample/swresample.h>
-#include <libavcodec/avcodec.h>
+#include "libavformat/avformat.h"
+#include "libswscale/swscale.h"
+#include "libswresample/swresample.h"
+#include "libavcodec/avcodec.h"
+#include "libavutil/imgutils.h"
 #include <ctype.h>
 #include <mupdf/fitz.h>
 #include <mupdf/pdf.h>
@@ -25,19 +27,29 @@
 #include <pthread.h>
 #include <sys/stat.h>
 #include <wordexp.h>
+#include "ft2build.h"
+#include "freetype/freetype.h"
+#include <archive.h>
+#include <archive_entry.h>
+#include <libxml/xmlstring.h>
+#include <libxml/parser.h>
+#define BOOL int
+#include <tesseract/capi.h>
+#include <pcre.h>
+
 #include <onion/onion.h>
 #include <onion/handler.h>
 #include <onion/block.h>
 #include <onion/shortcuts.h>
+#include <onion/codecs.h>
 #include <curl/curl.h>

-
 #include "cJSON/cJSON.h"

 #include "types.h"
 #include "tpool.h"
+#include "utf8.h/utf8.h"
 #include "util.h"
-#include "src/index/elastic.h"
 #include "io/store.h"
 #include "io/serialize.h"
 #include "io/walk.h"
@@ -47,9 +59,16 @@
 #include "parsing/pdf.h"
 #include "parsing/media.h"
 #include "parsing/font.h"
+#include "parsing/arc.h"
+#include "parsing/doc.h"
+#include "parsing/cbr.h"
+#include "cli.h"
+#include "log.h"
+
+#include "src/index/elastic.h"
 #include "index/web.h"
 #include "web/serve.h"
-#include "cli.h"
+#include "web/auth_basic.h"

 ;

--- a/src/tpool.c
+++ b/src/tpool.c
@@ -25,6 +25,7 @@ typedef struct tpool {
    int done_cnt;

    int stop;
+
    void (*cleanup_func)();
 } tpool_t;

@@ -100,7 +101,7 @@ static void *tpool_worker(void *arg) {
    tpool_t *pool = arg;

    while (1) {
-        pthread_mutex_lock(&(pool->work_mutex));
+        pthread_mutex_lock(&pool->work_mutex);
        if (pool->stop) {
            break;
        }
@@ -113,14 +114,21 @@ static void *tpool_worker(void *arg) {
        pthread_mutex_unlock(&(pool->work_mutex));

        if (work != NULL) {
+            if (pool->stop) {
+                break;
+            }
+
            work->func(work->arg);
+            free(work->arg);
            free(work);
        }

        pthread_mutex_lock(&(pool->work_mutex));
-        pool->done_cnt++;
+        if (work != NULL) {
+            pool->done_cnt++;
+        }

-        progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
+        progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);

        if (pool->work_head == NULL) {
            pthread_cond_signal(&(pool->working_cond));
@@ -128,6 +136,7 @@ static void *tpool_worker(void *arg) {
        pthread_mutex_unlock(&(pool->work_mutex));
    }

+    LOG_INFO("tpool.c", "Executing cleaup function")
    pool->cleanup_func();

    pthread_cond_signal(&(pool->working_cond));
@@ -136,17 +145,24 @@ static void *tpool_worker(void *arg) {
 }

 void tpool_wait(tpool_t *pool) {
+    LOG_INFO("tpool.c", "Waiting for worker threads to finish")
    pthread_mutex_lock(&(pool->work_mutex));
    while (1) {
        if (pool->done_cnt < pool->work_cnt) {
            pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
        } else {
-            pool->stop = 1;
-            break;
+            usleep(500000);
+            if (pool->done_cnt == pool->work_cnt) {
+                pool->stop = 1;
+                usleep(1000000);
+                break;
+            }
        }
-        progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
    }
+    progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
    pthread_mutex_unlock(&(pool->work_mutex));
+
+    LOG_INFO("tpool.c", "Worker threads finished")
 }

 void tpool_destroy(tpool_t *pool) {
@@ -154,6 +170,8 @@ void tpool_destroy(tpool_t *pool) {
        return;
    }

+    LOG_INFO("tpool.c", "Destroying thread pool")
+
    pthread_mutex_lock(&(pool->work_mutex));
    tpool_work_t *work = pool->work_head;
    while (work != NULL) {
@@ -167,9 +185,14 @@ void tpool_destroy(tpool_t *pool) {

    for (size_t i = 0; i < pool->thread_cnt; i++) {
        pthread_t thread = pool->threads[i];
-        pthread_cancel(thread);
+        if (thread != 0) {
+            void *_;
+            pthread_join(thread, &_);
+        }
    }

+    LOG_INFO("tpool.c", "Final cleanup")
+
    pthread_mutex_destroy(&(pool->work_mutex));
    pthread_cond_destroy(&(pool->has_work_cond));
    pthread_cond_destroy(&(pool->working_cond));
@@ -186,11 +209,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {

    tpool_t *pool = malloc(sizeof(tpool_t));
    pool->thread_cnt = thread_cnt;
-    pool->work_cnt =0;
-    pool->done_cnt =0;
+    pool->work_cnt = 0;
+    pool->done_cnt = 0;
    pool->stop = 0;
    pool->cleanup_func = cleanup_func;
-    pool->threads = malloc(sizeof(pthread_t) * thread_cnt);
+    pool->threads = calloc(sizeof(pthread_t), thread_cnt);

    pthread_mutex_init(&(pool->work_mutex), NULL);

@@ -200,11 +223,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
    pool->work_head = NULL;
    pool->work_tail = NULL;

-    for (size_t i = 0; i < thread_cnt; i++) {
-        pthread_t thread = pool->threads[i];
-        pthread_create(&thread, NULL, tpool_worker, pool);
-        pthread_detach(thread);
-    }
-
    return pool;
 }
+
+void tpool_start(tpool_t *pool) {
+
+    LOG_INFOF("tpool.c", "Starting thread pool with %d threads", pool->thread_cnt)
+
+    for (size_t i = 0; i < pool->thread_cnt; i++) {
+        pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
+    }
+}
--- a/src/tpool.h
+++ b/src/tpool.h
@@ -9,6 +9,7 @@ typedef struct tpool tpool_t;
 typedef void (*thread_func_t)(void *arg);

 tpool_t *tpool_create(size_t num, void (*cleanup_func)());
+void tpool_start(tpool_t *pool);
 void tpool_destroy(tpool_t *tm);

 int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
--- a/src/types.h
+++ b/src/types.h
@@ -2,13 +2,19 @@
 #define SIST2_TYPES_H


-#define META_INT_MASK 0xF0
-#define META_STR_MASK 0xE0
-#define META_LONG_MASK 0xD0
+#define META_INT_MASK 0x80
+#define META_STR_MASK 0x40
+#define META_LONG_MASK 0x20
 #define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
 #define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
 #define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK

+#define ARC_MODE_SKIP 0
+#define ARC_MODE_LIST 1
+#define ARC_MODE_SHALLOW 2
+#define ARC_MODE_RECURSE 3
+typedef int archive_mode_t;
+
 // This is written to file as a 8bit char!
 enum metakey {
    MetaContent = 1 | META_STR_MASK,
@@ -24,16 +30,32 @@ enum metakey {
    MetaGenre = 11 | META_STR_MASK,
    MetaTitle = 12 | META_STR_MASK,
    MetaFontName = 13 | META_STR_MASK,
+    MetaParent = 14 | META_STR_MASK,
+    MetaExifMake = 15 | META_STR_MASK,
+    MetaExifSoftware = 16 | META_STR_MASK,
+    MetaExifExposureTime = 17 | META_STR_MASK,
+    MetaExifFNumber = 18 | META_STR_MASK,
+    MetaExifFocalLength = 19 | META_STR_MASK,
+    MetaExifUserComment = 20 | META_STR_MASK,
+    MetaExifModel = 21 | META_STR_MASK,
+    MetaExifIsoSpeedRatings = 22 | META_STR_MASK,
+    MetaExifDateTime = 23 | META_STR_MASK,
+    //Note to self: this will break after 31 entries
 };

+#define INDEX_TYPE_BIN "binary"
+#define INDEX_TYPE_JSON "json"
+#define INDEX_VERSION_EXTERNAL "_external_v1"
+
 typedef struct index_descriptor {
    char uuid[UUID_STR_LEN];
-    char version[6];
+    char version[64];
    long timestamp;
    char root[PATH_MAX];
    char rewrite_url[8196];
    short root_len;
    char name[1024];
+    char type[64];
 } index_descriptor_t;

 typedef struct index_t {
@@ -66,10 +88,31 @@ typedef struct document {
    char *filepath;
 } document_t;

+typedef struct vfile vfile_t;
+
+typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
+
+typedef void (*close_func_t)(struct vfile *);
+
+typedef struct vfile {
+    union {
+        int fd;
+        struct archive *arc;
+    };
+
+    int is_fs_file;
+    char *filepath;
+
+    read_func_t read;
+    close_func_t close;
+} vfile_t;
+
 typedef struct parse_job_t {
    int base;
    int ext;
    struct stat info;
+    struct vfile vfile;
+    uuid_t parent;
    char filepath[1];
 } parse_job_t;

--- a/src/util.c
+++ b/src/util.c
@@ -1,20 +1,39 @@
-#define _GNU_SOURCE
 #include "util.h"
-
+#include "src/ctx.h"

 #define PBSTR "========================================"
 #define PBWIDTH 40

+dyn_buffer_t url_escape(char *str) {
+
+    dyn_buffer_t text = dyn_buffer_create();
+
+    char *ptr = str;
+    while (*ptr) {
+        if (*ptr == '#') {
+            dyn_buffer_write(&text, "%23", 3);
+            ptr++;
+        }
+
+        dyn_buffer_write_char(&text, *ptr++);
+    }
+    dyn_buffer_write_char(&text, '\0');
+
+    return text;
+}
+
 char *abspath(const char *path) {
    wordexp_t w;
    wordexp(path, &w, 0);

-    char *abs = canonicalize_file_name(w.we_wordv[0]);
+    char *abs = realpath(w.we_wordv[0], NULL);
    if (abs == NULL) {
        return NULL;
    }
-    abs = realloc(abs, strlen(abs) + 2);
-    strcat(abs, "/");
+    if (strlen(abs) > 1) {
+        abs = realloc(abs, strlen(abs) + 2);
+        strcat(abs, "/");
+    }

    wordfree(&w);
    return abs;
@@ -24,7 +43,7 @@ char *expandpath(const char *path) {
    wordexp_t w;
    wordexp(path, &w, 0);

-    char * expanded = malloc(strlen(w.we_wordv[0]) + 2);
+    char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
    strcpy(expanded, w.we_wordv[0]);
    strcat(expanded, "/");

@@ -76,4 +95,29 @@ GHashTable *incremental_get_table() {
    return file_table;
 }

+const char *find_file_in_paths(const char *paths[], const char *filename) {
+
+    for (int i = 0; paths[i] != NULL; i++) {
+
+        char *apath = abspath(paths[i]);
+        if (apath == NULL) {
+            continue;
+        }
+
+        char path[PATH_MAX];
+        snprintf(path, sizeof(path), "%s%s", apath, filename);
+
+        LOG_DEBUGF("util.c", "Looking for '%s' in folder '%s'", filename, apath)
+        free(apath);
+
+        struct stat info;
+        int ret = stat(path, &info);
+        if (ret != -1) {
+            return paths[i];
+        }
+    }
+
+    return NULL;
+}
+

--- a/src/util.h
+++ b/src/util.h
@@ -5,7 +5,10 @@

 #define TEXT_BUF_FULL -1
 #define INITIAL_BUF_SIZE 1024 * 16
-#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
+
+#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
+#define SHOULD_KEEP_CHAR(c) ((c >= '\'' && c <= ';') || (c >= 'A' && c <= 'z') || (c > 127))
+

 typedef struct dyn_buffer {
    char *buf;
@@ -21,8 +24,67 @@ typedef struct text_buffer {
    dyn_buffer_t dyn_buffer;
 } text_buffer_t;

+char *abspath(const char *path);
+
+char *expandpath(const char *path);
+
+dyn_buffer_t url_escape(char *str);
+
+void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
+
+GHashTable *incremental_get_table();
+
 __always_inline
-dyn_buffer_t dyn_buffer_create() {
+static int utf8_validchr2(const char *s) {
+    if (0x00 == (0x80 & *s)) {
+        return TRUE;
+    } else if (0xf0 == (0xf8 & *s)) {
+        if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
+            (0x80 != (0xc0 & s[3]))) {
+            return FALSE;
+        }
+
+        if (0x80 == (0xc0 & s[4])) {
+            return FALSE;
+        }
+
+        if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
+            return FALSE;
+        }
+    } else if (0xe0 == (0xf0 & *s)) {
+        if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
+            return FALSE;
+        }
+
+        if (0x80 == (0xc0 & s[3])) {
+            return FALSE;
+        }
+
+        if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
+            return FALSE;
+        }
+    } else if (0xc0 == (0xe0 & *s)) {
+        if (0x80 != (0xc0 & s[1])) {
+            return FALSE;
+        }
+
+        if (0x80 == (0xc0 & s[2])) {
+            return FALSE;
+        }
+
+        if (0 == (0x1e & s[0])) {
+            return FALSE;
+        }
+    } else {
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+__always_inline
+static dyn_buffer_t dyn_buffer_create() {
    dyn_buffer_t buf;

    buf.size = INITIAL_BUF_SIZE;
@@ -33,7 +95,7 @@ dyn_buffer_t dyn_buffer_create() {
 }

 __always_inline
-void grow_buffer(dyn_buffer_t *buf, size_t size) {
+static void grow_buffer(dyn_buffer_t *buf, size_t size) {
    if (buf->cur + size > buf->size) {
        do {
            buf->size *= 2;
@@ -44,7 +106,7 @@ void grow_buffer(dyn_buffer_t *buf, size_t size) {
 }

 __always_inline
-void grow_buffer_small(dyn_buffer_t *buf) {
+static void grow_buffer_small(dyn_buffer_t *buf) {
    if (buf->cur + sizeof(long) > buf->size) {
        buf->size *= 2;
        buf->buf = realloc(buf->buf, buf->size);
@@ -52,7 +114,7 @@ void grow_buffer_small(dyn_buffer_t *buf) {
 }

 __always_inline
-void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
+static void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
    grow_buffer(buf, size);

    memcpy(buf->buf + buf->cur, data, size);
@@ -60,7 +122,7 @@ void dyn_buffer_write(dyn_buffer_t *buf, void *data, size_t size) {
 }

 __always_inline
-void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
+static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
    grow_buffer_small(buf);

    *(buf->buf + buf->cur) = c;
@@ -68,13 +130,18 @@ void dyn_buffer_write_char(dyn_buffer_t *buf, char c) {
 }

 __always_inline
-void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
+static void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) {
    dyn_buffer_write(buf, str, strlen(str));
    dyn_buffer_write_char(buf, '\0');
 }

 __always_inline
-void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
+static void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) {
+    dyn_buffer_write(buf, str, strlen(str));
+}
+
+__always_inline
+static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
    grow_buffer_small(buf);

    *(int *) (buf->buf + buf->cur) = d;
@@ -82,7 +149,7 @@ void dyn_buffer_write_int(dyn_buffer_t *buf, int d) {
 }

 __always_inline
-void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
+static void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
    grow_buffer_small(buf);

    *(short *) (buf->buf + buf->cur) = s;
@@ -90,7 +157,7 @@ void dyn_buffer_write_short(dyn_buffer_t *buf, short s) {
 }

 __always_inline
-void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
+static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
    grow_buffer_small(buf);

    *(unsigned long *) (buf->buf + buf->cur) = l;
@@ -98,17 +165,17 @@ void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) {
 }

 __always_inline
-void dyn_buffer_destroy(dyn_buffer_t *buf) {
+static void dyn_buffer_destroy(dyn_buffer_t *buf) {
    free(buf->buf);
 }

 __always_inline
-void text_buffer_destroy(text_buffer_t *buf) {
+static void text_buffer_destroy(text_buffer_t *buf) {
    dyn_buffer_destroy(&buf->dyn_buffer);
 }

 __always_inline
-text_buffer_t text_buffer_create(int max_size) {
+static text_buffer_t text_buffer_create(int max_size) {
    text_buffer_t text_buf;

    text_buf.dyn_buffer = dyn_buffer_create();
@@ -119,27 +186,38 @@ text_buffer_t text_buffer_create(int max_size) {
 }

 __always_inline
-void text_buffer_terminate_string(text_buffer_t *buf) {
-    dyn_buffer_write_char(&buf->dyn_buffer, '\0');
-}
+static int text_buffer_append_char(text_buffer_t *buf, int c) {

-__always_inline
-int text_buffer_append_char(text_buffer_t *buf, int c) {
-
-    if (SHOULD_IGNORE_CHAR(c)) {
-        if (!buf->last_char_was_whitespace) {
+    if (SHOULD_IGNORE_CHAR(c) || c == ' ') {
+        if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) {
            dyn_buffer_write_char(&buf->dyn_buffer, ' ');
            buf->last_char_was_whitespace = TRUE;

-            if (buf->dyn_buffer.cur >= buf->max_size) {
+            if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
                return TEXT_BUF_FULL;
            }
        }
    } else {
        buf->last_char_was_whitespace = FALSE;
-        dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
+        grow_buffer_small(&buf->dyn_buffer);

-        if (buf->dyn_buffer.cur >= buf->max_size) {
+        if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
+        } else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
+        } else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
+        } else {
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
+            *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
+        }
+
+        if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
            return TEXT_BUF_FULL;
        }
    }
@@ -147,19 +225,48 @@ int text_buffer_append_char(text_buffer_t *buf, int c) {
    return 0;
 }

-char *abspath(const char * path);
-char *expandpath(const char *path);
-
-void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
-

 __always_inline
-void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
+static void text_buffer_terminate_string(text_buffer_t *buf) {
+    if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') {
+        *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0';
+    } else {
+        dyn_buffer_write_char(&buf->dyn_buffer, '\0');
+    }
+}
+
+__always_inline
+static int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
+
+    utf8_int32_t c;
+    if (str == NULL || len < 1 ||
+        (0xf0 == (0xf8 & str[0]) && len < 4) ||
+        (0xe0 == (0xf0 & str[0]) && len < 3) ||
+        (0xc0 == (0xe0 & str[0]) && len == 1) ||
+        *(str) == 0) {
+        return 0;
+    }
+
+    for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) {
+        if (utf8_validchr2(v)) {
+            text_buffer_append_char(buf, c);
+        }
+    }
+    return 0;
+}
+
+__always_inline
+static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
+    return text_buffer_append_string(buf, str, strlen(str));
+}
+
+__always_inline
+static void incremental_put(GHashTable *table, unsigned long inode_no, int mtime) {
    g_hash_table_insert(table, (gpointer) inode_no, GINT_TO_POINTER(mtime));
 }

 __always_inline
-int incremental_get(GHashTable *table, unsigned long inode_no) {
+static int incremental_get(GHashTable *table, unsigned long inode_no) {
    if (table != NULL) {
        return GPOINTER_TO_INT(g_hash_table_lookup(table, (gpointer) inode_no));
    } else {
@@ -168,12 +275,11 @@ int incremental_get(GHashTable *table, unsigned long inode_no) {
 }

 __always_inline
-int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
+static int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no) {
    g_hash_table_insert(table, GINT_TO_POINTER(inode_no), GINT_TO_POINTER(1));
 }

-GHashTable *incremental_get_table();
-

+const char *find_file_in_paths(const char **paths, const char *filename);

 #endif
--- a/src/web/auth_basic.c
+++ b/src/web/auth_basic.c
@@ -0,0 +1,59 @@
+#include "auth_basic.h"
+
+#define UNAUTHORIZED_TEXT "Unauthorized"
+
+typedef struct auth_basic_data {
+    onion_handler *inside;
+    const char *b64credentials;
+} auth_basic_data_t;
+
+
+int authenticate(const char *expected, const char *credentials) {
+
+    if (expected == NULL) {
+        return TRUE;
+    }
+
+    if (credentials && strncmp(credentials, "Basic ", 6) == 0) {
+        if (strcmp((credentials + 6), expected) == 0) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+int auth_basic_handler(auth_basic_data_t *d,
+                       onion_request *req,
+                       onion_response *res) {
+
+    const char *credentials = onion_request_get_header(req, "Authorization");
+
+    if (authenticate(d->b64credentials, credentials)) {
+        return onion_handler_handle(d->inside, req, res);
+    }
+
+    onion_response_set_header(res, "WWW-Authenticate", "Basic realm=\"sist2\"");
+    onion_response_set_code(res, HTTP_UNAUTHORIZED);
+    onion_response_write(res, UNAUTHORIZED_TEXT, sizeof(UNAUTHORIZED_TEXT));
+    onion_response_set_length(res, sizeof(UNAUTHORIZED_TEXT));
+
+    return OCS_PROCESSED;
+}
+
+void auth_basic_free(auth_basic_data_t *data) {
+    onion_handler_free(data->inside);
+    free(data);
+}
+
+onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level) {
+
+    auth_basic_data_t *privdata = malloc(sizeof(auth_basic_data_t));
+
+    privdata->b64credentials = b64credentials;
+    privdata->inside = inside_level;
+
+    return onion_handler_new((onion_handler_handler) auth_basic_handler, privdata,
+                             (onion_handler_private_data_free) auth_basic_free);
+}
+
--- a/src/web/auth_basic.h
+++ b/src/web/auth_basic.h
@@ -0,0 +1,4 @@
+#include "src/sist.h"
+
+
+onion_handler *auth_basic(const char *b64credentials, onion_handler *inside_level);
--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -43,27 +43,40 @@ int javascript(void *p, onion_request *req, onion_response *res) {
    return OCS_PROCESSED;
 }

-int style(void *p, onion_request *req, onion_response *res) {
-    set_default_headers(res);
-    onion_response_set_header(res, "Content-Type", "text/css");
-    onion_response_set_length(res, sizeof(bundle_css));
-    onion_response_write(res, bundle_css, sizeof(bundle_css));
-    return OCS_PROCESSED;
+int client_requested_dark_theme(onion_request *req) {
+    const char *cookie = onion_request_get_cookie(req, "sist");
+    if (cookie == NULL) {
+        return FALSE;
+    }
+
+    return strcmp(cookie, "dark") == 0;
 }

-int bg_bars(void *p, onion_request *req, onion_response *res) {
+int style(void *p, onion_request *req, onion_response *res) {
    set_default_headers(res);
-    onion_response_set_header(res, "Content-Type", "image/png");
-    onion_response_set_length(res, sizeof(bg_bars_png));
-    onion_response_write(res, bg_bars_png, sizeof(bg_bars_png));
+
+    onion_response_set_header(res, "Content-Type", "text/css");
+
+    if (client_requested_dark_theme(req)) {
+        onion_response_set_length(res, sizeof(bundle_dark_css));
+        onion_response_write(res, bundle_dark_css, sizeof(bundle_dark_css));
+    } else {
+        onion_response_set_length(res, sizeof(bundle_css));
+        onion_response_write(res, bundle_css, sizeof(bundle_css));
+    }
    return OCS_PROCESSED;
 }

 int img_sprite_skin_flag(void *p, onion_request *req, onion_response *res) {
    set_default_headers(res);
    onion_response_set_header(res, "Content-Type", "image/png");
-    onion_response_set_length(res, sizeof(sprite_skin_flat_png));
-    onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
+    if (client_requested_dark_theme(req)) {
+        onion_response_set_length(res, sizeof(sprite_skin_flat_dark_png));
+        onion_response_write(res, sprite_skin_flat_dark_png, sizeof(sprite_skin_flat_dark_png));
+    } else {
+        onion_response_set_length(res, sizeof(sprite_skin_flat_png));
+        onion_response_write(res, sprite_skin_flat_png, sizeof(sprite_skin_flat_png));
+    }
    return OCS_PROCESSED;
 }

@@ -97,7 +110,7 @@ int thumbnail(void *p, onion_request *req, onion_response *res) {
    int written = onion_response_write(res, data, data_len);
    onion_response_flush(res);
    if (written != data_len || data_len == 0) {
-        printf("Couldn't write thumb\n");
+        LOG_DEBUG("serve.c", "Couldn't write thumbnail");
    }
    free(data);

@@ -168,7 +181,12 @@ int chunked_response_file(const char *filename, const char *mime,
        }
    }
    onion_response_set_length(res, length);
-    onion_response_set_header(res, "Content-Type", mime);
+    if (mime != NULL) {
+        onion_response_set_header(res, "Content-Type", mime);
+    } else {
+        onion_response_set_header(res, "Content-Type", "application/octet-stream");
+    }
+
    onion_response_write_headers(res);
    if ((onion_request_get_flags(request) & OR_HEAD) == OR_HEAD) {
        length = 0;
@@ -201,21 +219,13 @@ int chunked_response_file(const char *filename, const char *mime,
    return OCS_PROCESSED;
 }

-int search(void *p, onion_request *req, onion_response *res) {
+int search(UNUSED(void *p), onion_request *req, onion_response *res) {

    int flags = onion_request_get_flags(req);
    if ((flags & OR_METHODS) != OR_POST) {
        return OCS_NOT_PROCESSED;
    }

-    char *scroll_param;
-    const char *scroll = onion_request_get_query(req, "scroll");
-    if (scroll != NULL) {
-        scroll_param = "?scroll=3m";
-    } else {
-        scroll_param = "";
-    }
-
    const struct onion_block_t *block = onion_request_get_data(req);

    if (block == NULL) {
@@ -223,7 +233,7 @@ int search(void *p, onion_request *req, onion_response *res) {
    }

    char url[4096];
-    snprintf(url, 4096, "%s/sist2/_search%s", WebCtx.es_url, scroll_param);
+    snprintf(url, 4096, "%s/sist2/_search", WebCtx.es_url);
    response_t *r = web_post(url, onion_block_data(block), "Content-Type: application/json");

    set_default_headers(res);
@@ -232,6 +242,9 @@ int search(void *p, onion_request *req, onion_response *res) {

    if (r->status_code == 200) {
        onion_response_write(res, r->body, r->size);
+    } else {
+        sist_log("serve.c", SIST_WARNING, "ElasticSearch error during query");
+        onion_response_set_code(res, HTTP_INTERNAL_ERROR);
    }

    free_response(r);
@@ -239,43 +252,6 @@ int search(void *p, onion_request *req, onion_response *res) {
    return OCS_PROCESSED;
 }

-int scroll(void *p, onion_request *req, onion_response *res) {
-
-    int flags = onion_request_get_flags(req);
-    if ((flags & OR_METHODS) != OR_GET) {
-        return OCS_NOT_PROCESSED;
-    }
-
-    char url[4096];
-    snprintf(url, 4096, "%s/_search/scroll", WebCtx.es_url);
-
-    const char *scroll_id = onion_request_get_query(req, "scroll_id");
-
-    cJSON *json = cJSON_CreateObject();
-    cJSON_AddStringToObject(json, "scroll_id", scroll_id);
-    cJSON_AddStringToObject(json, "scroll", "3m");
-
-    char *json_str = cJSON_PrintUnformatted(json);
-    response_t *r = web_post(url, json_str, "Content-Type: application/json");
-
-    cJSON_Delete(json);
-    cJSON_free(json_str);
-
-    if (r->status_code != 200) {
-        free_response(r);
-        return OCS_NOT_PROCESSED;
-    }
-
-    set_default_headers(res);
-    onion_response_set_header(res, "Content-Type", "application/json");
-    onion_response_set_header(res, "Content-Disposition", "application/json");
-    onion_response_set_length(res, r->size);
-    onion_response_write(res, r->body, r->size);
-    free_response(r);
-
-    return OCS_PROCESSED;
-}
-
 int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {

    const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
@@ -287,7 +263,10 @@ int serve_file_from_url(cJSON *json, index_t *idx, onion_request *req, onion_res
             "%s%s/%s%s%s",
             idx->desc.rewrite_url, path, name, strlen(ext) == 0 ? "" : ".", ext);

-    return onion_shortcut_redirect(url, req, res);
+    dyn_buffer_t encoded = url_escape(url);
+    int ret = onion_shortcut_redirect(encoded.buf, req, res);
+    dyn_buffer_destroy(&encoded);
+    return ret;
 }

 int serve_file_from_disk(cJSON *json, index_t *idx, onion_request *req, onion_response *res) {
@@ -301,10 +280,15 @@ int serve_file_from_disk(cJSON *json, index_t *idx, onion_request *req, onion_re
    snprintf(full_path, PATH_MAX, "%s%s/%s%s%s",
             idx->desc.root, path, name, strlen(ext) == 0 ? "" : ".", ext);

+    char disposition[8196];
+    snprintf(disposition, sizeof(disposition), "inline; filename=\"%s%s%s\"",
+             name, strlen(ext) == 0 ? "" : ".", ext);
+    onion_response_set_header(res, "Content-Disposition", disposition);
+
    return chunked_response_file(full_path, mime, 1, req, res);
 }

-int index_info(void *p, onion_request *req, onion_response *res) {
+int index_info(UNUSED(void *p), onion_request *req, onion_response *res) {
    cJSON *json = cJSON_CreateObject();
    cJSON *arr = cJSON_AddArrayToObject(json, "indices");

@@ -318,7 +302,7 @@ int index_info(void *p, onion_request *req, onion_response *res) {
        cJSON_AddStringToObject(idx_json, "name", idx->desc.name);
        cJSON_AddStringToObject(idx_json, "version", idx->desc.version);
        cJSON_AddStringToObject(idx_json, "id", idx->desc.uuid);
-        cJSON_AddNumberToObject(idx_json, "timestamp", (double)idx->desc.timestamp);
+        cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
        cJSON_AddItemToArray(arr, idx_json);
    }

@@ -330,35 +314,99 @@ int index_info(void *p, onion_request *req, onion_response *res) {
    return OCS_PROCESSED;
 }

-int file(void *p, onion_request *req, onion_response *res) {
+
+int document_info(UNUSED(void *p), onion_request *req, onion_response *res) {

    const char *arg_uuid = onion_request_get_query(req, "1");
    if (arg_uuid == NULL) {
        return OCS_PROCESSED;
    }

-    cJSON *source = elastic_get_document(arg_uuid);
+    cJSON *doc = elastic_get_document(arg_uuid);
+    cJSON *source = cJSON_GetObjectItem(doc, "_source");

-    const char *index_id = cJSON_GetObjectItem(source, "index")->valuestring;
-
-    index_t *idx = get_index_by_id(index_id);
-
-    if (idx == NULL) {
+    cJSON *index_id = cJSON_GetObjectItem(source, "index");
+    if (index_id == NULL) {
+        cJSON_Delete(doc);
        return OCS_NOT_PROCESSED;
    }

-    const char *name = cJSON_GetObjectItem(source, "name")->valuestring;
-    const char *ext = cJSON_GetObjectItem(source, "extension")->valuestring;
-    char disposition[8196];
-    snprintf(disposition, sizeof(disposition), "inline; filename=\"%s%s%s\"",
-             name, strlen(ext) == 0 ? "" : ".", ext);
-    onion_response_set_header(res, "Content-Disposition", disposition);
-
-    if (strlen(idx->desc.rewrite_url) == 0) {
-        return serve_file_from_disk(source, idx, req, res);
-    } else {
-        return serve_file_from_url(source, idx, req, res);
+    index_t *idx = get_index_by_id(index_id->valuestring);
+    if (idx == NULL) {
+        cJSON_Delete(doc);
+        return OCS_NOT_PROCESSED;
    }
+
+    onion_response_set_header(res, "Content-Type", "application/json");
+
+    char *json_str = cJSON_PrintUnformatted(source);
+    onion_response_write0(res, json_str);
+    free(json_str);
+    cJSON_Delete(doc);
+
+    return OCS_PROCESSED;
+}
+
+int file(UNUSED(void *p), onion_request *req, onion_response *res) {
+
+    const char *arg_uuid = onion_request_get_query(req, "1");
+    if (arg_uuid == NULL) {
+        return OCS_PROCESSED;
+    }
+
+    const char *next = arg_uuid;
+    cJSON *doc = NULL;
+    cJSON *index_id = NULL;
+    cJSON *source = NULL;
+
+    while (true) {
+        doc = elastic_get_document(next);
+        source = cJSON_GetObjectItem(doc, "_source");
+        index_id = cJSON_GetObjectItem(source, "index");
+        if (index_id == NULL) {
+            cJSON_Delete(doc);
+            return OCS_NOT_PROCESSED;
+        }
+        cJSON *parent = cJSON_GetObjectItem(source, "parent");
+        if (parent == NULL) {
+            break;
+        }
+        next = parent->valuestring;
+    }
+
+    index_t *idx = get_index_by_id(index_id->valuestring);
+
+    if (idx == NULL) {
+        cJSON_Delete(doc);
+        return OCS_NOT_PROCESSED;
+    }
+
+    int ret;
+    if (strlen(idx->desc.rewrite_url) == 0) {
+        ret = serve_file_from_disk(source, idx, req, res);
+    } else {
+        ret = serve_file_from_url(source, idx, req, res);
+    }
+    cJSON_Delete(doc);
+
+    return ret;
+}
+
+int status(UNUSED(void *p), UNUSED(onion_request *req), onion_response *res) {
+    set_default_headers(res);
+
+    onion_response_set_header(res, "Content-Type", "application/x-empty");
+
+    char *status = elastic_get_status();
+    if (strcmp(status, "open") == 0) {
+        onion_response_set_code(res, 204);
+    } else {
+        onion_response_set_code(res, 500);
+    }
+
+    free(status);
+
+    return OCS_PROCESSED;
 }

 void serve(const char *hostname, const char *port) {
@@ -368,17 +416,18 @@ void serve(const char *hostname, const char *port) {
    onion_set_hostname(o, hostname);
    onion_set_port(o, port);

-    onion_url *urls = onion_root_url(o);
+    onion_url *urls = onion_url_new();

    // Static paths
+    onion_set_root_handler(o, auth_basic(WebCtx.b64credentials, onion_url_to_handler(urls)));
+
    onion_url_add(urls, "", search_index);
    onion_url_add(urls, "css", style);
    onion_url_add(urls, "js", javascript);
-    onion_url_add(urls, "img/bg-bars.png", bg_bars);
    onion_url_add(urls, "img/sprite-skin-flat.png", img_sprite_skin_flag);

    onion_url_add(urls, "es", search);
-    onion_url_add(urls, "scroll", scroll);
+    onion_url_add(urls, "status", status);
    onion_url_add(
            urls,
            "^t/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})/"
@@ -386,8 +435,10 @@ void serve(const char *hostname, const char *port) {
            thumbnail
    );
    onion_url_add(urls, "^f/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", file);
+    onion_url_add(urls, "^d/([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12})$", document_info);
    onion_url_add(urls, "i", index_info);

+
    printf("Starting web server @ http://%s:%s\n", hostname, port);

    onion_listen(o);
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/utf8.h
+++ b/utf8.h
--- a/web/css/auto-complete.css
+++ b/web/css/auto-complete.css
@@ -1,9 +0,0 @@
-.autocomplete-suggestions {
-    text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1);
-
-    /* core styles should not be changed */
-    position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box;
-}
-.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
-.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
-.autocomplete-suggestion.selected { background: #f0f0f0; }
--- a/web/css/dark.css
+++ b/web/css/dark.css
@@ -0,0 +1,459 @@
+*:focus {
+    outline: 0;
+}
+
+.info-icon {
+    width: 1rem;
+    margin-right: 0.2rem;
+    cursor: pointer;
+    color: #757575;
+    line-height: 1rem;
+    height: 1.1rem;
+}
+
+.info-icon:hover {
+    color: inherit;
+}
+
+.modal-title {
+    max-width: calc(100% - 2rem);
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+
+
+.path-row {
+    display: -ms-flexbox;
+    display: flex;
+    -ms-flex-align: start;
+    align-items: flex-start;
+}
+
+.tag-container {
+    margin-left: 0.3rem;
+}
+
+.path-line {
+    color: #BBB;
+    text-overflow: ellipsis;
+    overflow: hidden;
+}
+
+a {
+    color: #00BCD4;
+}
+
+body {
+    overflow-y: scroll;
+    background: black;
+}
+
+.progress {
+    margin-top: 1em;
+}
+
+.card, .modal-content {
+    margin-top: 1em;
+    background: #212121;
+    color: #e0e0e0;
+    border-radius: 1px;
+    border: none;
+}
+
+.table {
+    color: #e0e0e0;
+}
+
+.table td, .table th {
+    border: none;
+}
+
+.table thead th {
+    border-bottom: 1px solid #646464;
+}
+
+.modal-header .close {
+    color: #e0e0e0;
+    text-shadow: none;
+}
+
+.modal-header {
+    border-bottom: 1px solid #646464;
+}
+
+.sub-document {
+    background: #37474F !important;
+}
+
+.list-group-item.sub-document {
+    border-top: 1px solid #646464 !important;
+}
+
+.sub-document .text-muted {
+    color: #8a949c !important;
+}
+
+
+.list-group-item {
+    background: #212121;
+    color: #e0e0e0;
+
+    border-top: 1px solid #424242;
+    border-bottom: none;
+    border-left: none;
+    border-right: none;
+    padding: .25rem 0.5rem;
+}
+
+.list-group-item:first-child {
+    border-top: none;
+}
+
+.navbar-brand {
+    font-size: 1.75rem;
+    padding: 0;
+    color: #f5f5f5;
+}
+
+.navbar {
+    background: #546b7a;
+}
+
+.navbar a:hover {
+    color: #fff;
+}
+
+.navbar span {
+    color: #eee;
+}
+
+.document {
+    padding: 0.5rem;
+}
+
+.document p {
+    margin-bottom: 0;
+}
+
+.document:hover p {
+    text-decoration: underline;
+}
+
+.badge-video {
+    color: #FFFFFF;
+    background-color: #F27761;
+}
+
+.badge-image {
+    color: #FFFFFF;
+    background-color: #AA99C9;
+}
+
+.badge-audio {
+    color: #FFFFFF;
+    background-color: #00ADEF;
+}
+
+.badge-resolution {
+    color: #212529;
+    background-color: #B0BEC5;
+}
+
+.badge-text {
+    color: #FFFFFF;
+    background-color: #FAAB3C;
+}
+
+.card-img-overlay {
+    pointer-events: none;
+    padding: 0.75rem;
+
+    bottom: unset;
+    top: 0;
+    left: unset;
+    right: unset;
+}
+
+.file-title {
+    width: 100%;
+    line-height: 1rem;
+    height: 1.1rem;
+    font-size: 10pt;
+    white-space: nowrap;
+    text-overflow: ellipsis;
+    overflow: hidden;
+    color: #00BCD4;
+}
+
+.badge {
+    margin-right: 3px;
+}
+
+.badge-user {
+    color: #212529;
+    background-color: #e0e0e0;
+}
+
+.fit {
+    display: block;
+    min-width: 64px;
+    max-width: 100%;
+    max-height: 175px;
+    margin: 0 auto 0;
+    padding: 3px 3px 0;
+    width: auto;
+    height: auto;
+}
+
+.fit-sm {
+    display: block;
+    max-width: 64px;
+    max-height: 64px;
+    margin: 0 auto;
+    width: auto;
+    height: auto;
+}
+
+.audio-fit {
+    height: 39px;
+    vertical-align: bottom;
+    display: inline;
+    width: 100%;
+}
+
+@media (min-width: 1200px) {
+    .card-columns {
+        column-count: 4;
+    }
+}
+
+@media (min-width: 1500px) {
+    .container {
+        max-width: 1440px;
+    }
+
+    .card-columns {
+        column-count: 5;
+    }
+}
+
+@media (min-width: 1800px) {
+    .container {
+        max-width: 1550px;
+    }
+}
+
+mark {
+    background: rgba(251, 191, 41, 0.25);
+    border-radius: 0;
+    padding: 1px 0;
+    color: inherit;
+}
+
+.content-div mark {
+    background: rgba(251, 191, 41, 0.40);
+    color: white;
+}
+
+
+.content-div {
+    font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+    font-size: 13px;
+    padding: 1em;
+    background-color: #37474F;
+    border: 1px solid #616161;
+    border-radius: 4px;
+    margin: 3px;
+    white-space: normal;
+    color: rgb(224, 224, 224);
+}
+
+.irs-single, .irs-from, .irs-to {
+    font-size: 13px;
+    background-color: #00BCD4;
+}
+
+.irs-slider {
+    cursor: col-resize;
+}
+
+.irs {
+    margin-top: 1em;
+    margin-bottom: 1em;
+}
+
+.custom-select {
+    overflow: auto;
+    background-color: #37474F;
+    border: 1px solid #616161;
+    color: #bdbdbd;
+}
+
+.custom-select:focus {
+    border-color: #757575;
+    outline: 0;
+    box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .25);
+}
+
+option {
+    outline: none;
+}
+
+.form-control {
+    background-color: #37474F;
+    border: 1px solid #616161;
+    color: #fff;
+}
+
+.form-control:focus {
+    background-color: #546E7A;
+    color: #fff;
+}
+
+.input-group-text {
+    background: #263238;
+    border: 1px solid #616161;
+    color: #dbdbdb;
+}
+
+::placeholder {
+    color: #BDBDBD !important;
+    opacity: 1;
+}
+
+
+.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
+    background: none;
+}
+
+.inspire-tree .icon-expand::before, .inspire-tree .icon-collapse::before {
+    background-color: black;
+}
+
+.inspire-tree .title {
+    color: #eee;
+}
+
+.inspire-tree {
+    font-weight: 400;
+    font-size: 14px;
+    font-family: Helvetica, Nueue, Verdana, sans-serif;
+    max-height: 350px;
+    overflow: auto;
+}
+
+.page-indicator {
+    line-height: 1rem;
+    padding: 0.5rem;
+    background: #212121;
+    color: #eee;
+    margin-top: 1em;
+}
+
+.btn-xs {
+    padding: .1rem .3rem;
+    font-size: .875rem;
+    border-radius: .2rem;
+}
+
+.btn {
+    color: #eee;
+}
+
+.nav-tabs .nav-link {
+    color: #e0e0e0;
+}
+
+.nav-tabs .nav-item.show .nav-link, .nav-tabs .nav-link.active {
+    background-color: #212121;
+    border-color: #616161 #616161 #212121;
+    color: #e0e0e0;
+}
+
+.nav-tabs .nav-link:focus, .nav-tabs .nav-link:focus {
+    border-color: #616161 #616161 #212121;
+    color: #e0e0e0;
+}
+
+.nav-tabs .nav-link:focus, .nav-tabs .nav-link:hover {
+    border-color: #e0e0e0 #e0e0e0 #212121;
+    color: #e0e0e0;
+}
+
+.nav-tabs {
+    border-bottom: #616161;
+}
+
+.nav {
+    margin-top: 0.5rem;
+}
+
+@media (max-width: 800px) {
+    #treeTabs {
+        flex-basis: inherit;
+        flex-grow: inherit;
+    }
+}
+
+.list-group {
+    margin-top: 1em;
+}
+
+.wrapper-sm {
+    min-width: 64px;
+}
+
+.media-expanded {
+    display: inherit;
+}
+
+.media-expanded .fit {
+    max-height: 250px;
+}
+
+@media (max-width: 600px) {
+    .media-expanded .fit {
+        max-height: none;
+    }
+
+    .tagline {
+        display: none;
+    }
+}
+
+.version {
+    color: #00BCD4;
+    margin-left: -18px;
+    margin-top: -14px;
+    font-size: 11px;
+}
+
+@media (min-width: 800px) {
+    .small-btn {
+        display: none;
+    }
+    .large-btn {
+        display: inherit;
+    }
+}
+
+@media (max-width: 801px) {
+    .small-btn {
+        display: inherit;
+    }
+    .large-btn {
+        display: none;
+    }
+}
+
+#searchBar {
+    border-right: none;
+}
+
+#pathTree .title {
+    cursor: pointer;
+}
+
+svg {
+    fill: white;
+}
--- a/web/css/jquery.toast.min.css
+++ b/web/css/jquery.toast.min.css
@@ -0,0 +1 @@
+.jq-toast-wrap,.jq-toast-wrap *{margin:0;padding:0}.jq-toast-wrap{display:block;position:fixed;width:250px;pointer-events:none!important;letter-spacing:normal;z-index:9000!important}.jq-toast-wrap.bottom-left{bottom:20px;left:20px}.jq-toast-wrap.bottom-right{bottom:20px;right:40px}.jq-toast-wrap.top-left{top:20px;left:20px}.jq-toast-wrap.top-right{top:20px;right:40px}.jq-toast-single{display:block;width:100%;padding:10px;margin:0 0 5px;border-radius:4px;font-size:12px;font-family:arial,sans-serif;line-height:17px;position:relative;pointer-events:all!important;background-color:#444;color:#fff}.jq-toast-single h2{font-family:arial,sans-serif;font-size:14px;margin:0 0 7px;background:0 0;color:inherit;line-height:inherit;letter-spacing:normal}.jq-toast-single a{color:#eee;text-decoration:none;font-weight:700;border-bottom:1px solid #fff;padding-bottom:3px;font-size:12px}.jq-toast-single ul{margin:0 0 0 15px;background:0 0;padding:0}.jq-toast-single ul li{list-style-type:disc!important;line-height:17px;background:0 0;margin:0;padding:0;letter-spacing:normal}.close-jq-toast-single{position:absolute;top:3px;right:7px;font-size:14px;cursor:pointer}.jq-toast-loader{display:block;position:absolute;top:-2px;height:5px;width:0;left:0;border-radius:5px;background:red}.jq-toast-loaded{width:100%}.jq-has-icon{padding:10px 10px 10px 50px;background-repeat:no-repeat;background-position:10px}.jq-icon-info{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGwSURBVEhLtZa9SgNBEMc9sUxxRcoUKSzSWIhXpFMhhYWFhaBg4yPYiWCXZxBLERsLRS3EQkEfwCKdjWJAwSKCgoKCcudv4O5YLrt7EzgXhiU3/4+b2ckmwVjJSpKkQ6wAi4gwhT+z3wRBcEz0yjSseUTrcRyfsHsXmD0AmbHOC9Ii8VImnuXBPglHpQ5wwSVM7sNnTG7Za4JwDdCjxyAiH3nyA2mtaTJufiDZ5dCaqlItILh1NHatfN5skvjx9Z38m69CgzuXmZgVrPIGE763Jx9qKsRozWYw6xOHdER+nn2KkO+Bb+UV5CBN6WC6QtBgbRVozrahAbmm6HtUsgtPC19tFdxXZYBOfkbmFJ1VaHA1VAHjd0pp70oTZzvR+EVrx2Ygfdsq6eu55BHYR8hlcki+n+kERUFG8BrA0BwjeAv2M8WLQBtcy+SD6fNsmnB3AlBLrgTtVW1c2QN4bVWLATaIS60J2Du5y1TiJgjSBvFVZgTmwCU+dAZFoPxGEEs8nyHC9Bwe2GvEJv2WXZb0vjdyFT4Cxk3e/kIqlOGoVLwwPevpYHT+00T+hWwXDf4AJAOUqWcDhbwAAAAASUVORK5CYII=);background-color:#31708f;color:#d9edf7;border-color:#bce8f1}.jq-icon-warning{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGYSURBVEhL5ZSvTsNQFMbXZGICMYGYmJhAQIJAICYQPAACiSDB8AiICQQJT4CqQEwgJvYASAQCiZiYmJhAIBATCARJy+9rTsldd8sKu1M0+dLb057v6/lbq/2rK0mS/TRNj9cWNAKPYIJII7gIxCcQ51cvqID+GIEX8ASG4B1bK5gIZFeQfoJdEXOfgX4QAQg7kH2A65yQ87lyxb27sggkAzAuFhbbg1K2kgCkB1bVwyIR9m2L7PRPIhDUIXgGtyKw575yz3lTNs6X4JXnjV+LKM/m3MydnTbtOKIjtz6VhCBq4vSm3ncdrD2lk0VgUXSVKjVDJXJzijW1RQdsU7F77He8u68koNZTz8Oz5yGa6J3H3lZ0xYgXBK2QymlWWA+RWnYhskLBv2vmE+hBMCtbA7KX5drWyRT/2JsqZ2IvfB9Y4bWDNMFbJRFmC9E74SoS0CqulwjkC0+5bpcV1CZ8NMej4pjy0U+doDQsGyo1hzVJttIjhQ7GnBtRFN1UarUlH8F3xict+HY07rEzoUGPlWcjRFRr4/gChZgc3ZL2d8oAAAAASUVORK5CYII=);background-color:#8a6d3b;color:#fcf8e3;border-color:#faebcc}.jq-icon-error{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAHOSURBVEhLrZa/SgNBEMZzh0WKCClSCKaIYOED+AAKeQQLG8HWztLCImBrYadgIdY+gIKNYkBFSwu7CAoqCgkkoGBI/E28PdbLZmeDLgzZzcx83/zZ2SSXC1j9fr+I1Hq93g2yxH4iwM1vkoBWAdxCmpzTxfkN2RcyZNaHFIkSo10+8kgxkXIURV5HGxTmFuc75B2RfQkpxHG8aAgaAFa0tAHqYFfQ7Iwe2yhODk8+J4C7yAoRTWI3w/4klGRgR4lO7Rpn9+gvMyWp+uxFh8+H+ARlgN1nJuJuQAYvNkEnwGFck18Er4q3egEc/oO+mhLdKgRyhdNFiacC0rlOCbhNVz4H9FnAYgDBvU3QIioZlJFLJtsoHYRDfiZoUyIxqCtRpVlANq0EU4dApjrtgezPFad5S19Wgjkc0hNVnuF4HjVA6C7QrSIbylB+oZe3aHgBsqlNqKYH48jXyJKMuAbiyVJ8KzaB3eRc0pg9VwQ4niFryI68qiOi3AbjwdsfnAtk0bCjTLJKr6mrD9g8iq/S/B81hguOMlQTnVyG40wAcjnmgsCNESDrjme7wfftP4P7SP4N3CJZdvzoNyGq2c/HWOXJGsvVg+RA/k2MC/wN6I2YA2Pt8GkAAAAASUVORK5CYII=);background-color:#a94442;color:#f2dede;border-color:#ebccd1}.jq-icon-success{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAADsSURBVEhLY2AYBfQMgf///3P8+/evAIgvA/FsIF+BavYDDWMBGroaSMMBiE8VC7AZDrIFaMFnii3AZTjUgsUUWUDA8OdAH6iQbQEhw4HyGsPEcKBXBIC4ARhex4G4BsjmweU1soIFaGg/WtoFZRIZdEvIMhxkCCjXIVsATV6gFGACs4Rsw0EGgIIH3QJYJgHSARQZDrWAB+jawzgs+Q2UO49D7jnRSRGoEFRILcdmEMWGI0cm0JJ2QpYA1RDvcmzJEWhABhD/pqrL0S0CWuABKgnRki9lLseS7g2AlqwHWQSKH4oKLrILpRGhEQCw2LiRUIa4lwAAAABJRU5ErkJggg==);color:#dff0d8;background-color:#3c763d;border-color:#d6e9c6}
--- a/web/css/light.css
+++ b/web/css/light.css
@@ -0,0 +1,316 @@
+*:focus {
+    outline: 0;
+}
+
+.info-icon {
+    width: 1rem;
+    margin-right: 0.2rem;
+    cursor: pointer;
+    color: #757575;
+    line-height: 1rem;
+    height: 1rem;
+}
+
+.info-icon:hover {
+    color: inherit;
+}
+
+.modal-title {
+    max-width: calc(100% - 2rem);
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
+
+.path-row {
+    display: -ms-flexbox;
+    display: flex;
+    -ms-flex-align: start;
+    align-items: flex-start;
+}
+
+.tag-container {
+    margin-left: 0.3rem;
+}
+
+.path-line {
+    color: #444;
+    text-overflow: ellipsis;
+    overflow: hidden;
+}
+
+body {
+    overflow-y: scroll;
+}
+
+.progress {
+    margin-top: 1em;
+}
+
+.card {
+    margin-top: 1em;
+    box-shadow: 0 .125rem .25rem rgba(0, 0, 0, .075) !important;
+}
+
+.sub-document {
+    background: #AB47BC1F !important;
+}
+
+.navbar-brand {
+    font-size: 1.75rem;
+    padding: 0;
+}
+
+.navbar {
+    background: #F7F7F7;
+    border-bottom: solid 1px #dfdfdf;
+}
+
+.document {
+    padding: 0.5rem;
+}
+
+.document p {
+    margin-bottom: 0;
+}
+
+.document:hover p {
+    text-decoration: underline;
+}
+
+.badge-video {
+    color: #FFFFFF;
+    background-color: #F27761;
+}
+
+.badge-image {
+    color: #FFFFFF;
+    background-color: #AA99C9;
+}
+
+.badge-audio {
+    color: #FFFFFF;
+    background-color: #00ADEF;
+}
+
+.badge-resolution {
+    color: #212529;
+    background-color: #FFC107;
+}
+
+.badge-user {
+    color: #212529;
+    background-color: #e0e0e0;
+}
+
+.badge-text {
+    color: #FFFFFF;
+    background-color: #FAAB3C;
+}
+
+.card-img-overlay {
+    pointer-events: none;
+    padding: 0.75rem;
+
+    bottom: unset;
+    top: 0;
+    left: unset;
+    right: unset;
+}
+
+.file-title {
+    width: 100%;
+    line-height: 1rem;
+    height: 1.1rem;
+    font-size: 10pt;
+    white-space: nowrap;
+    text-overflow: ellipsis;
+    overflow: hidden;
+}
+
+.badge {
+    margin-right: 3px;
+}
+
+.fit {
+    display: block;
+    min-width: 64px;
+    max-width: 100%;
+    max-height: 175px;
+    margin: 0 auto 0;
+    padding: 3px 3px 0 3px;
+    width: auto;
+    height: auto;
+}
+
+.fit-sm {
+    display: block;
+    max-width: 64px;
+    max-height: 64px;
+    margin: 0 auto 0;
+    width: auto;
+    height: auto;
+}
+
+.audio-fit {
+    height: 39px;
+    vertical-align: bottom;
+    display: inline;
+    width: 100%;
+}
+
+@media (min-width: 1200px) {
+    .card-columns {
+        column-count: 4;
+    }
+}
+
+@media (min-width: 1500px) {
+    .container {
+        max-width: 1440px;
+    }
+
+    .card-columns {
+        column-count: 5;
+    }
+}
+
+@media (min-width: 1800px) {
+    .container {
+        max-width: 1550px;
+    }
+}
+
+mark {
+    background: #fff217;
+    border-radius: 0;
+    padding: 1px 0;
+    color: inherit;
+}
+
+.content-div {
+    font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+    font-size: 13px;
+    padding: 1em;
+    background-color: #f5f5f5;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+    margin: 3px;
+    white-space: normal;
+    color: #000;
+}
+
+.irs-single, .irs-from, .irs-to {
+    font-size: 13px;
+}
+
+.irs-slider {
+    cursor: col-resize;
+}
+
+.custom-select {
+    overflow: auto;
+}
+
+.irs {
+    margin-top: 1em;
+    margin-bottom: 1em;
+}
+
+.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow {
+    background: none;
+}
+
+.inspire-tree {
+    font-weight: 400;
+    font-size: 14px;
+    font-family: Helvetica, Nueue, Verdana, sans-serif;
+    max-height: 350px;
+    overflow: auto;
+}
+
+.page-indicator {
+    line-height: 1rem;
+    padding: 0.5rem;
+    background: #f8f9fa;
+    margin-top: 1em;
+}
+
+.btn-xs {
+    padding: .1rem .3rem;
+    font-size: .875rem;
+    border-radius: .2rem;
+}
+
+.nav {
+    margin-top: 0.5rem;
+}
+
+@media (max-width: 800px) {
+    #treeTabs {
+        flex-basis: inherit;
+        flex-grow: inherit;
+    }
+}
+
+.list-group {
+    margin-top: 1em;
+}
+
+.list-group-item {
+    padding: .25rem 0.5rem;
+}
+
+.wrapper-sm {
+    min-width: 64px;
+}
+
+.media-expanded {
+    display: inherit;
+}
+
+.media-expanded .fit {
+    max-height: 250px;
+}
+
+@media (max-width: 600px) {
+    .media-expanded .fit {
+        max-height: none;
+    }
+
+    .tagline {
+        display: none;
+    }
+}
+
+.version {
+    color: #007bff;
+    margin-left: -18px;
+    margin-top: -14px;
+    font-size: 11px;
+}
+
+@media (min-width: 800px) {
+    .small-btn {
+        display: none;
+    }
+    .large-btn {
+        display: inherit;
+    }
+}
+
+@media (max-width: 801px) {
+    .small-btn {
+        display: inherit;
+    }
+    .large-btn {
+        display: none;
+    }
+}
+
+#searchBar {
+    border-right: none;
+}
+
+#pathTree .title {
+    cursor: pointer;
+}
--- a/web/css/main.css
+++ b/web/css/main.css
@@ -1,163 +0,0 @@
-body {overflow-y:scroll;}
-.card {
-    margin-top: 1em;
-}
-.navbar-brand {
-    font-size: 1.75rem;
-    padding: 0;
-}
-.navbar {
-    background: #F7F7F7; border-bottom: solid 1px #dfdfdf;
-}
-.document {
-    padding: 0.5rem;
-}
-
-.document p {
-    margin-bottom: 0;
-}
-
-.document:hover p {
-    text-decoration: underline;
-}
-
-.badge-video {
-    color: #FFFFFF;
-    background-color: #F27761;
-}
-
-.badge-image {
-    color: #FFFFFF;
-    background-color: #AA99C9;
-}
-
-.badge-audio {
-    color: #FFFFFF;
-    background-color: #00ADEF;
-}
-
-.badge-resolution {
-    color: #212529;
-    background-color: #FFC107;
-}
-
-.badge-text {
-    color: #FFFFFF;
-    background-color: #FAAB3C;
-}
-
-.card-img-overlay {
-    pointer-events: none;
-    padding: 0.75rem;
-
-    bottom: unset;
-    top: 0;
-    left: unset;
-    right: unset;
-}
-
-.file-title {
-    font-size: 10pt;
-    white-space: nowrap;
-    text-overflow: ellipsis;
-    overflow: hidden;
-}
-
-.badge {
-    margin-right: 3px;
-}
-
-.fit {
-    display: block;
-    min-width: 64px;
-    max-width: 100%;
-    max-height: 175px;
-    margin: 0 auto 0;
-    padding: 3px 3px 0 3px;
-    width: auto;
-    height: auto;
-}
-
-.audio-fit {
-    height: 39px;
-    vertical-align: bottom;
-    display: inline;
-}
-
-@media (min-width: 1200px) {
-    .card-columns {
-        column-count: 4;
-    }
-}
-
-@media (min-width: 1500px) {
-    .container  {
-        max-width: 1440px;
-    }
-    .card-columns {
-        column-count: 5;
-    }
-}
-
-@media (min-width: 1800px) {
-    .container  {
-        max-width: 1550px;
-    }
-}
-
-mark {
-    background: #fff217;
-    border-radius: 0;
-    padding: 1px 0;
-}
-
-.content-div {
-    font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;
-    font-size: 13px;
-    padding: 1em;
-    background-color: #f5f5f5;
-    border: 1px solid #ccc;
-    border-radius: 4px;
-    margin: 3px;
-}
-
-.irs-single, .irs-from, .irs-to {
-    font-size: 13px;
-}
-
-.irs-slider {
-    cursor: col-resize;
-}
-
-.custom-select {
-    overflow: auto;
-}
-
-.irs {
-    margin-top: 1em;
-    margin-bottom: 1em;
-}
-
-.inspire-tree .selected > .wholerow, .inspire-tree .selected > .title-wrap:hover + .wholerow
-{
-    background: none;
-}
-
-.inspire-tree {
-    font-weight: 400;
-    font-size: 14px;
-    font-family: Helvetica, Nueue, Verdana, sans-serif;
-    max-height: 350px;
-    overflow: auto;
-}
-
-.page-indicator {
-    line-height: 1rem;
-    padding: 0.5rem;
-}
-
-.btn-xs {
-    padding: .1rem .3rem;
-    font-size: .875rem;
-    border-radius: .2rem;
-}
--- a/web/css/smartphoto.min.css
+++ b/web/css/smartphoto.min.css
--- a/web/img/bg-bars.png
+++ b/web/img/bg-bars.png
--- a/web/img/sprite-skin-flat-dark.png
+++ b/web/img/sprite-skin-flat-dark.png
--- a/web/js/1_popper.min.js
+++ b/web/js/1_popper.min.js
--- a/web/js/5_inspire-tree.min.js
+++ b/web/js/5_inspire-tree.min.js
--- a/web/js/7_jquery.toast.min.js
+++ b/web/js/7_jquery.toast.min.js
--- a/web/js/auto-complete.min.js
+++ b/web/js/auto-complete.min.js
--- a/web/js/dom.js
+++ b/web/js/dom.js
@@ -75,39 +75,157 @@ function shouldPlayVideo(hit) {
    return videoc !== "hevc" && videoc !== "mpeg2video" && videoc !== "wmv3";
 }

-/**
- *
- * @param hit
- * @returns {Element}
- */
-function createDocCard(hit) {
-    let docCard = document.createElement("div");
-    docCard.setAttribute("class", "card shadow-sm");
+function shouldDisplayRawImage(hit) {
+    return hit["_source"]["mime"] && hit["_source"]["mime"].startsWith("image/") && hit["_source"]["videoc"] !== "tiff";
+}

-    let docCardBody = document.createElement("div");
-    docCardBody.setAttribute("class", "card-body document");
+function makePlaceholder(w, h, small) {
+    let calc;
+    if (small) {
+        calc = w > h
+            ? (64 / w / h) >= 100
+                ? (64 * w / h)
+                : 64
+            : 64;
+    } else {
+        calc = w > h
+            ? (175 / w / h) >= 272
+                ? (175 * w / h)
+                : 175
+            : 175;
+    }

-    let link = document.createElement("a");
-    link.setAttribute("href", "f/" + hit["_id"]);
-    link.setAttribute("target", "_blank");
+    const el = document.createElement("div");
+    el.setAttribute("style", `height: ${calc}px`);
+    return el;
+}

-    //Title
-    let title = document.createElement("p");
+function ext(hit) {
+    return hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
+}
+
+function makeTitle(hit) {
+    let title = document.createElement("div");
    title.setAttribute("class", "file-title");
-    let extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
+    let extension = ext(hit);

    applyNameToTitle(hit, title, extension);

    title.setAttribute("title", hit["_source"]["path"] + "/" + hit["_source"]["name"] + extension);
-    docCard.appendChild(title);
+    return title;
+}
+
+function getTags(hit, mimeCategory) {
+
+    let tags = [];
+    switch (mimeCategory) {
+        case "video":
+        case "image":
+            if (hit["_source"].hasOwnProperty("videoc") && hit["_source"]["videoc"]) {
+                const formatTag = document.createElement("span");
+                formatTag.setAttribute("class", "badge badge-pill badge-video");
+                formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
+                tags.push(formatTag);
+            }
+            break;
+        case "audio": {
+            if (hit["_source"].hasOwnProperty("audioc") && hit["_source"]["audioc"]) {
+                let formatTag = document.createElement("span");
+                formatTag.setAttribute("class", "badge badge-pill badge-audio");
+                formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
+                tags.push(formatTag);
+            }
+        }
+            break;
+    }
+    // User tags
+    if (hit["_source"].hasOwnProperty("tag")) {
+        hit["_source"]["tag"].forEach(tag => {
+            const userTag = document.createElement("span");
+            userTag.setAttribute("class", "badge badge-pill badge-user");
+
+            const tokens = tag.split("#");
+
+            if (tokens.length > 1) {
+                const bg = "#" + tokens[1];
+                const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
+                userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
+            }
+
+            const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
+            userTag.appendChild(document.createTextNode(name));
+            tags.push(userTag);
+        })
+    }
+
+    return tags
+}
+
+function infoButtonCb(hit) {
+    return () => {
+        getDocumentInfo(hit["_id"]).then(doc => {
+            $("#modal-title").text(doc["name"] + ext(hit));
+
+            const tbody = $("<tbody>");
+            $("#modal-body").empty()
+                .append($("<table class='table table-sm'>")
+                    .append($("<thead>")
+                        .append($("<tr>")
+                            .append($("<th>").text("Field"))
+                            .append($("<th>").text("Value"))
+                        )
+                    )
+                    .append(tbody)
+                );
+
+            const displayFields = new Set([
+                "mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
+                "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
+            ]);
+            Object.keys(doc)
+                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
+                .forEach(key => {
+                    tbody.append($("<tr>")
+                        .append($("<td>").text(key))
+                        .append($("<td>").text(doc[key]))
+                    );
+                });
+            if (doc.hasOwnProperty("content") && doc["content"]) {
+                $("#modal-body").append($("<div class='content-div'>").text(doc["content"]))
+            }
+
+            $("#modal").modal();
+        });
+    }
+}
+
+function createDocCard(hit) {
+    let docCard = document.createElement("div");
+    docCard.setAttribute("class", "card");
+
+    let docCardBody = document.createElement("div");
+    docCardBody.setAttribute("class", "card-body document");
+
+    //Title
+    let title = makeTitle(hit);
+    let isSubDocument = false;
+
+    let link = document.createElement("a");
+    link.setAttribute("href", "f/" + hit["_id"]);
+    link.setAttribute("target", "_blank");
+    link.style.maxWidth = "calc(100% - 1.2rem)";
+    link.appendChild(title);
+
+    if (hit["_source"].hasOwnProperty("parent")) {
+        docCard.classList.add("sub-document");
+        isSubDocument = true;
+    }

    let tagContainer = document.createElement("div");
    tagContainer.setAttribute("class", "card-text");

    if (hit["_source"].hasOwnProperty("mime") && hit["_source"]["mime"] !== null) {

-        let tags = [];
-        let thumbnail = null;
        let thumbnailOverlay = null;
        let imgWrapper = document.createElement("div");
        imgWrapper.setAttribute("style", "position: relative");
@@ -115,26 +233,7 @@ function createDocCard(hit) {
        let mimeCategory = hit["_source"]["mime"].split("/")[0];

        //Thumbnail
-        if (mimeCategory === "video" && shouldPlayVideo(hit)) {
-            thumbnail = document.createElement("video");
-            addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
-
-            thumbnail.setAttribute("class", "fit");
-            thumbnail.setAttribute("loop", "");
-            thumbnail.setAttribute("controls", "");
-            thumbnail.setAttribute("preload", "none");
-            thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
-            thumbnail.addEventListener("dblclick", function () {
-                thumbnail.webkitRequestFullScreen();
-            });
-        } else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 20 && hit["_source"]["height"] > 20)
-            || hit["_source"]["mime"] === "application/pdf"
-            || hit["_source"].hasOwnProperty("font_name")
-        ) {
-            thumbnail = document.createElement("img");
-            thumbnail.setAttribute("class", "card-img-top fit");
-            thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
-        }
+        let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, false);

        //Thumbnail overlay
        switch (mimeCategory) {
@@ -144,15 +243,17 @@ function createDocCard(hit) {
                thumbnailOverlay.setAttribute("class", "card-img-overlay");

                //Resolution
-                let resolutionBadge = document.createElement("span");
-                resolutionBadge.setAttribute("class", "badge badge-resolution");
-                if (hit["_source"].hasOwnProperty("width")) {
-                    resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
+                if (hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32) {
+                    let resolutionBadge = document.createElement("span");
+                    resolutionBadge.setAttribute("class", "badge badge-resolution");
+                    if (hit["_source"].hasOwnProperty("width")) {
+                        resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
+                    }
+                    thumbnailOverlay.appendChild(resolutionBadge);
                }
-                thumbnailOverlay.appendChild(resolutionBadge);

                // Hover
-                if (thumbnail && hit["_source"]["videoc"] === "gif") {
+                if (thumbnail && hit["_source"]["videoc"] === "gif" && !isSubDocument) {
                    gifOver(thumbnail, hit);
                }
                break;
@@ -162,52 +263,34 @@ function createDocCard(hit) {
                if (hit["_source"].hasOwnProperty("duration")) {
                    thumbnailOverlay = document.createElement("div");
                    thumbnailOverlay.setAttribute("class", "card-img-overlay");
-                    let durationBadge = document.createElement("span");
+                    const durationBadge = document.createElement("span");
                    durationBadge.setAttribute("class", "badge badge-resolution");
                    durationBadge.appendChild(document.createTextNode(humanTime(hit["_source"]["duration"])));
                    thumbnailOverlay.appendChild(durationBadge);
                }
        }

-        //Tags
-        //todo: handle new tags
-        switch (mimeCategory) {
-            case "video":
-            case "image":
-                if (hit.hasOwnProperty("videoc")) {
-                    let formatTag = document.createElement("span");
-                    formatTag.setAttribute("class", "badge badge-pill badge-video");
-                    formatTag.appendChild(document.createTextNode(hit["_source"]["videoc"].replace(" ", "")));
-                    tags.push(formatTag);
-                }
-                break;
-            case "audio": {
-                if (hit.hasOwnProperty("audioc")) {
-                    let formatTag = document.createElement("span");
-                    formatTag.setAttribute("class", "badge badge-pill badge-audio");
-                    formatTag.appendChild(document.createTextNode(hit["_source"]["audioc"]));
-                    tags.push(formatTag);
-                }
-            }
-                break;
+        // Tags
+        let tags = getTags(hit, mimeCategory);
+        for (let i = 0; i < tags.length; i++) {
+            tagContainer.appendChild(tags[i]);
        }

        //Content
        let contentHl = getContentHighlight(hit);
        if (contentHl !== undefined) {
-            let contentDiv = document.createElement("div");
-            contentDiv.setAttribute("class", "content-div bg-light");
+            const contentDiv = document.createElement("div");
+            contentDiv.setAttribute("class", "content-div");
            contentDiv.insertAdjacentHTML('afterbegin', contentHl);
            docCard.appendChild(contentDiv);
        }

        if (thumbnail !== null) {
-            imgWrapper.appendChild(thumbnail);
            docCard.appendChild(imgWrapper);
        }

        //Audio
-        if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc")) {
+        if (mimeCategory === "audio" && hit["_source"].hasOwnProperty("audioc") && !isSubDocument) {

            let audio = document.createElement("audio");
            audio.setAttribute("preload", "none");
@@ -222,10 +305,6 @@ function createDocCard(hit) {
        if (thumbnailOverlay !== null) {
            imgWrapper.appendChild(thumbnailOverlay);
        }
-
-        for (let i = 0; i < tags.length; i++) {
-            tagContainer.appendChild(tags[i]);
-        }
    }

    //Size tag
@@ -234,20 +313,211 @@ function createDocCard(hit) {
    sizeTag.setAttribute("class", "text-muted");
    tagContainer.appendChild(sizeTag);

-    docCardBody.appendChild(link);
+    const titleWrapper = document.createElement("div");
+    titleWrapper.style.display = "flex";
+
+    const infoButton = makeInfoButton(hit);
+
+    titleWrapper.appendChild(infoButton);
+    titleWrapper.appendChild(link);
+
+    docCardBody.appendChild(titleWrapper);
    docCard.appendChild(docCardBody);

-    link.appendChild(title);
    docCardBody.appendChild(tagContainer);

    return docCard;
 }

+function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
+    let thumbnail;
+    let isSubDocument = hit["_source"].hasOwnProperty("parent");
+
+    if (mimeCategory === "video" && shouldPlayVideo(hit) && !isSubDocument) {
+        thumbnail = document.createElement("video");
+        addVidSrc("f/" + hit["_id"], hit["_source"]["mime"], thumbnail);
+
+        const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
+        imgWrapper.appendChild(placeholder);
+
+        if (small) {
+            thumbnail.setAttribute("class", "fit-sm");
+        } else {
+            thumbnail.setAttribute("class", "fit");
+        }
+        if (small) {
+            thumbnail.style.cursor = "pointer";
+            thumbnail.title = "Enlarge";
+            thumbnail.addEventListener("click", function () {
+                imgWrapper.classList.remove("wrapper-sm", "mr-1");
+                imgWrapper.parentElement.classList.add("media-expanded");
+                thumbnail.setAttribute("class", "fit");
+                thumbnail.setAttribute("controls", "");
+            });
+        } else {
+            thumbnail.setAttribute("controls", "");
+        }
+        thumbnail.setAttribute("preload", "none");
+        thumbnail.setAttribute("poster", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
+        thumbnail.addEventListener("dblclick", function () {
+            thumbnail.setAttribute("controls", "");
+            if (thumbnail.webkitRequestFullScreen) {
+                thumbnail.webkitRequestFullScreen();
+            } else {
+                thumbnail.requestFullscreen();
+            }
+        });
+        const poster = new Image();
+        poster.src = thumbnail.getAttribute('poster');
+        poster.addEventListener("load", function () {
+            placeholder.remove();
+            imgWrapper.appendChild(thumbnail);
+        });
+    } else if ((hit["_source"].hasOwnProperty("width") && hit["_source"]["width"] > 32 && hit["_source"]["height"] > 32)
+        || hit["_source"]["mime"] === "application/pdf"
+        || hit["_source"]["mime"] === "application/epub+zip"
+        || hit["_source"]["mime"] === "application/x-cbz"
+        || hit["_source"]["mime"] === "application/x-cbr"
+        || hit["_source"].hasOwnProperty("font_name")
+    ) {
+        thumbnail = document.createElement("img");
+        if (small) {
+            thumbnail.setAttribute("class", "fit-sm");
+        } else {
+            thumbnail.setAttribute("class", "card-img-top fit");
+        }
+        thumbnail.setAttribute("src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
+
+        if (!hit["_source"]["parent"] && shouldDisplayRawImage(hit)) {
+            imgWrapper.setAttribute("id", "sp" + hit["_id"]);
+            imgWrapper.setAttribute("data-src", `t/${hit["_source"]["index"]}/${hit["_id"]}`);
+            imgWrapper.setAttribute("href", `f/${hit["_id"]}`);
+            imgWrapper.setAttribute("data-caption", hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit));
+            imgWrapper.setAttribute("data-group", "p" + Math.floor(docCount / SIZE));
+            imgWrapper.classList.add("sp");
+        }
+
+        const placeholder = makePlaceholder(hit["_source"]["width"], hit["_source"]["height"], small);
+        imgWrapper.appendChild(placeholder);
+
+        thumbnail.addEventListener("error", () => {
+            imgWrapper.remove();
+        });
+        thumbnail.addEventListener("load", () => {
+            placeholder.remove();
+            imgWrapper.appendChild(thumbnail);
+        });
+    }
+
+    return thumbnail;
+}
+
+function makeInfoButton(hit) {
+    const infoButton = document.createElement("span");
+    infoButton.appendChild(document.createTextNode("🛈"));
+    infoButton.setAttribute("class", "info-icon");
+    infoButton.addEventListener("click", infoButtonCb(hit));
+    return infoButton;
+}
+
+function createDocLine(hit) {
+
+    const mime = hit["_source"]["mime"];
+    let mimeCategory = mime ? mime.split("/")[0] : null;
+    let tags = getTags(hit, mimeCategory);
+
+    let imgWrapper = document.createElement("div");
+    imgWrapper.setAttribute("class", "align-self-start mr-1 wrapper-sm");
+
+    let media = document.createElement("div");
+    media.setAttribute("class", "media");
+
+    const line = document.createElement("div");
+    line.setAttribute("class", "list-group-item flex-column align-items-start");
+
+    if (hit["_source"].hasOwnProperty("parent")) {
+        line.classList.add("sub-document");
+        isSubDocument = true;
+    }
+
+    const infoButton = makeInfoButton(hit);
+
+    const title = makeTitle(hit);
+
+    let link = document.createElement("a");
+    link.setAttribute("href", "f/" + hit["_id"]);
+    link.setAttribute("target", "_blank");
+    link.appendChild(title);
+
+    const titleDiv = document.createElement("div");
+
+    const titleWrapper = document.createElement("div");
+    titleWrapper.style.display = "flex";
+    titleWrapper.appendChild(infoButton);
+    titleWrapper.appendChild(link);
+
+    titleDiv.appendChild(titleWrapper);
+
+    line.appendChild(media);
+
+    let thumbnail = makeThumbnail(mimeCategory, hit, imgWrapper, true);
+    if (thumbnail) {
+        media.appendChild(imgWrapper);
+    }
+    media.appendChild(titleDiv);
+
+    // Content
+    let contentHl = getContentHighlight(hit);
+    if (contentHl !== undefined) {
+        const contentDiv = document.createElement("div");
+        contentDiv.setAttribute("class", "content-div");
+        contentDiv.insertAdjacentHTML('afterbegin', contentHl);
+        titleDiv.appendChild(contentDiv);
+    }
+
+    let pathLine = document.createElement("div");
+    pathLine.setAttribute("class", "path-row");
+
+    let path = document.createElement("div");
+    path.setAttribute("class", "path-line");
+    path.setAttribute("title", hit["_source"]["path"] + "/");
+    path.appendChild(document.createTextNode(hit["_source"]["path"] + "/"));
+
+    let tagContainer = document.createElement("div");
+    tagContainer.setAttribute("class", "tag-container");
+
+    for (let i = 0; i < tags.length; i++) {
+        tagContainer.appendChild(tags[i]);
+    }
+
+    //Size tag
+    let sizeTag = document.createElement("small");
+    sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
+    sizeTag.setAttribute("class", "text-muted");
+    tagContainer.appendChild(sizeTag);
+
+    titleDiv.appendChild(pathLine);
+    pathLine.appendChild(path);
+    pathLine.appendChild(tagContainer);
+
+    return line;
+}
+
+function makePreloader() {
+    const elem = document.createElement("div");
+    elem.setAttribute("class", "progress");
+    const bar = document.createElement("div");
+    bar.setAttribute("class", "progress-bar progress-bar-striped progress-bar-animated");
+    bar.setAttribute("style", "width: 100%");
+    elem.appendChild(bar);
+
+    return elem;
+}
+
 function makePageIndicator(searchResult) {
    let pageIndicator = document.createElement("div");
-    pageIndicator.setAttribute("class", "page-indicator shadow-sm bg-light font-weight-light");
-    const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
-        ? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
+    pageIndicator.setAttribute("class", "page-indicator font-weight-light");
+    const totalHits = searchResult["aggregations"]["total_count"]["value"];
    pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
    return pageIndicator;
 }
@@ -256,18 +526,56 @@ function makePageIndicator(searchResult) {
 function makeStatsCard(searchResult) {

    let statsCard = document.createElement("div");
-    statsCard.setAttribute("class", "card");
+    statsCard.setAttribute("class", "card stat");
    let statsCardBody = document.createElement("div");
    statsCardBody.setAttribute("class", "card-body");

-    let stat = document.createElement("p");
-    const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
-        ? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
+    const resultMode = document.createElement("div");
+    resultMode.setAttribute("class", "btn-group btn-group-toggle");
+    resultMode.setAttribute("data-toggle", "buttons");
+    resultMode.style.cssFloat = "right";
+
+    const listMode = document.createElement("label");
+    listMode.setAttribute("class", "btn btn-primary");
+    listMode.appendChild(document.createTextNode("List"));
+
+    const gridMode = document.createElement("label");
+    gridMode.setAttribute("class", "btn btn-primary");
+    gridMode.appendChild(document.createTextNode("Grid"));
+
+    resultMode.appendChild(gridMode);
+    resultMode.appendChild(listMode);
+
+    if (CONF.options.display === "grid") {
+        gridMode.classList.add("active")
+    } else {
+        listMode.classList.add("active")
+    }
+
+    gridMode.addEventListener("click", () => {
+        console.log("what");
+        console.log(CONF.options);
+        CONF.options.display = "grid";
+        console.log(CONF.options);
+        CONF.save();
+        console.log(CONF.options);
+        searchDebounced();
+    });
+    listMode.addEventListener("click", () => {
+        CONF.options.display = "list";
+        CONF.save();
+        searchDebounced();
+    });
+
+    let stat = document.createElement("span");
+    const totalHits = searchResult["aggregations"]["total_count"]["value"];
    stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
+
    statsCardBody.appendChild(stat);
+    statsCardBody.appendChild(resultMode);

    if (totalHits !== 0) {
-        let sizeStat = document.createElement("span");
+        let sizeStat = document.createElement("div");
        sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
        statsCardBody.appendChild(sizeStat);
    }
@@ -279,7 +587,11 @@ function makeStatsCard(searchResult) {

 function makeResultContainer() {
    let resultContainer = document.createElement("div");
-    resultContainer.setAttribute("class", "card-columns");

+    if (CONF.options.display === "grid") {
+        resultContainer.setAttribute("class", "card-columns");
+    } else {
+        resultContainer.setAttribute("class", "list-group");
+    }
    return resultContainer;
 }
--- a/web/js/jquery-smartphoto.min.js
+++ b/web/js/jquery-smartphoto.min.js
--- a/web/js/search.js
+++ b/web/js/search.js
@@ -1,14 +1,64 @@
-const SIZE = 20;
+const SIZE = 40;
 let mimeMap = [];
-let tree;
+let tagMap = [];
+let mimeTree;
+let tagTree;

 let searchBar = document.getElementById("searchBar");
 let pathBar = document.getElementById("pathBar");
-let scroll_id = null;
+let lastDoc = null;
+let reachedEnd = false;
 let docCount = 0;
 let coolingDown = false;
 let searchBusy = true;
 let selectedIndices = [];
+let indexMap = {};
+
+const CONF = new Settings();
+
+const _defaults = {
+    display: "grid",
+    fuzzy: true,
+    highlight: true
+};
+
+function Settings() {
+    this.options = {};
+
+    this._onUpdate = function () {
+        $("#fuzzyToggle").prop("checked", this.options.fuzzy);
+    }
+
+    this.load = function () {
+        const raw = window.localStorage.getItem("options");
+        if (raw === null) {
+            this.options = _defaults;
+        } else {
+            this.options = JSON.parse(raw);
+        }
+
+        this._onUpdate();
+    }
+
+    this.save = function () {
+        window.localStorage.setItem("options", JSON.stringify(this.options));
+        this._onUpdate();
+    }
+}
+
+
+function showEsError() {
+    $.toast({
+        heading: "Elasticsearch connection error",
+        text: "sist2 web module encountered an error while connecting " +
+            "to Elasticsearch. See server logs for more information.",
+        stack: false,
+        bgColor: "#a94442",
+        textColor: "#f2dede",
+        position: 'bottom-right',
+        hideAfter: false
+    });
+}

 jQuery["jsonPost"] = function (url, data) {
    return jQuery.ajax({
@@ -17,25 +67,76 @@ jQuery["jsonPost"] = function (url, data) {
        data: JSON.stringify(data),
        contentType: "application/json"
    }).fail(err => {
+        showEsError();
        console.log(err);
    });
 };

-function toggleSearchBar() {
+window.onload = () => {
+    $("#theme").on("click", () => {
+        if (!document.cookie.includes("sist")) {
+            document.cookie = "sist=dark";
+        } else {
+            document.cookie = "sist=; Max-Age=-99999999;";
+        }
+        window.location.reload();
+    })
+    CONF.load();
+};
+
+function toggleFuzzy() {
    searchDebounced();
 }

 $.jsonPost("i").then(resp => {
+
+    const urlIndices = (new URLSearchParams(location.search)).get("i");
    resp["indices"].forEach(idx => {
-        $("#indices").append($("<option>")
+        indexMap[idx.id] = idx.name;
+        const opt = $("<option>")
            .attr("value", idx.id)
-            .attr("selected", true)
-            .append(idx.name)
-        );
-        selectedIndices.push(idx.id);
+            .append(idx.name);
+
+        if (urlIndices) {
+            if (urlIndices.split(",").indexOf(idx.name) !== -1) {
+                opt.attr("selected", true);
+                selectedIndices.push(idx.id);
+            }
+        } else if (!idx.name.includes("(nsfw)")) {
+            opt.attr("selected", true);
+            selectedIndices.push(idx.id);
+        }
+        $("#indices").append(opt);
    });
+
+    createPathTree("#pathTree");
 });

+function getDocumentInfo(id) {
+    return $.getJSON("d/" + id).fail(e => {
+        console.log(e);
+        showEsError();
+    })
+}
+
+function handleTreeClick(tree) {
+    return (event, node, handler) => {
+        event.preventTreeDefault();
+
+        if (node.id === "any") {
+            if (!node.itree.state.checked) {
+                tree.deselect();
+            }
+        } else {
+            tree.node("any").deselect();
+        }
+
+        handler();
+        searchDebounced();
+    }
+}
+
+//TODO: filter based on selected indexes, sort mime types
 $.jsonPost("es", {
    aggs: {
        mimeTypes: {
@@ -72,104 +173,108 @@ $.jsonPost("es", {
    });
    mimeMap.push({"text": "All", "id": "any"});

-    tree = new InspireTree({
+    mimeTree = new InspireTree({
        selection: {
            mode: 'checkbox'
        },
        data: mimeMap
    });
-    new InspireTreeDOM(tree, {
-        target: '.tree'
+    new InspireTreeDOM(mimeTree, {
+        target: '#mimeTree'
    });
-    tree.on("node.click", function (event, node, handler) {
-        event.preventTreeDefault();
+    mimeTree.on("node.click", handleTreeClick(mimeTree));
+    mimeTree.deselect();
+    mimeTree.node("any").select();
+});

-        if (node.id === "any") {
-            if (!node.itree.state.checked) {
-                tree.deselect();
+// Tags tree
+$.jsonPost("es", {
+    aggs: {
+        tags: {
+            terms: {
+                field: "tag",
+                size: 10000
            }
-        } else {
-            tree.node("any").deselect();
        }
+    },
+    size: 0,
+}).then(resp => {
+    resp["aggregations"]["tags"]["buckets"]
+        .sort((a, b) => a["key"].localeCompare(b["key"]))
+        .forEach(bucket => {
+            addTag(tagMap, bucket["key"], bucket["key"], bucket["doc_count"])
+        });

-        handler();
-        searchDebounced();
+    tagMap.push({"text": "All", "id": "any"});
+    tagTree = new InspireTree({
+        selection: {
+            mode: 'checkbox'
+        },
+        data: tagMap
    });
-    tree.select();
-    tree.node("any").deselect();
+    new InspireTreeDOM(tagTree, {
+        target: '#tagTree'
+    });
+    tagTree.on("node.click", handleTreeClick(tagTree));
+    tagTree.node("any").select();
    searchBusy = false;
 });

-new autoComplete({
-    selector: '#pathBar',
-    minChars: 1,
-    delay: 75,
-    renderItem: function (item) {
-        return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
-    },
-    source: async function (term, suggest) {
-        term = term.toLowerCase();
+function addTag(map, tag, id, count) {
+    let tags = tag.split("#")[0].split(".");

-        const choices = await getPathChoices();
+    let child = {
+        id: id,
+        text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
+        children: []
+    };

-        let matches = [];
-        for (let i = 0; i < choices.length; i++) {
-            if (~choices[i].toLowerCase().indexOf(term)) {
-                matches.push(choices[i]);
+    let found = false;
+    map.forEach(node => {
+        if (node.text === child.text) {
+            found = true;
+            if (tags.length !== 1) {
+                addTag(node.children, tags.slice(1).join("."), id, count);
            }
        }
-        suggest(matches);
-    },
-    onSelect: function () {
-        searchDebounced();
+    });
+    if (!found) {
+        if (tags.length !== 1) {
+            addTag(child.children, tags.slice(1).join("."), id, count);
+            map.push(child);
+        } else {
+            map.push(child);
+        }
    }
-});
+}

 function insertHits(resultContainer, hits) {
    for (let i = 0; i < hits.length; i++) {
-        resultContainer.appendChild(createDocCard(hits[i]));
+
+        if (CONF.options.display === "grid") {
+            resultContainer.appendChild(createDocCard(hits[i]));
+        } else {
+            resultContainer.appendChild(createDocLine(hits[i]));
+        }
        docCount++;
    }
 }

 window.addEventListener("scroll", function () {
-    if (!coolingDown && !searchBusy) {
+    if (!searchBusy) {
        let threshold = 400;

        if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight - threshold) {
-            coolingDown = true;
-            doScroll();
+            if (!reachedEnd) {
+                coolingDown = true;
+                search(lastDoc);
+            }
        }
    }
 });

-function doScroll() {
-    $.get("scroll", {scroll_id: scroll_id})
-        .then(searchResult => {
-            let searchResults = document.getElementById("searchResults");
-            let hits = searchResult["hits"]["hits"];
-
-            //Page indicator
-            let pageIndicator = makePageIndicator(searchResult);
-            searchResults.appendChild(pageIndicator);
-
-            //Result container
-            let resultContainer = makeResultContainer();
-            searchResults.appendChild(resultContainer);
-
-            insertHits(resultContainer, hits);
-
-            if (hits.length === SIZE) {
-                coolingDown = false;
-            }
-        })
-        .fail(() => {
-            window.location.reload();
-        })
-}
-
-function getSelectedMimeTypes() {
-    let mimeTypes = [];
+function getSelectedNodes(tree) {
+    let selectedNodes = [];

    let selected = tree.selected();

@@ -181,105 +286,142 @@ function getSelectedMimeTypes() {

        //Only get children
        if (selected[i].text.indexOf("(") !== -1) {
-            mimeTypes.push(selected[i].id);
+            selectedNodes.push(selected[i].id);
        }
    }

-    return mimeTypes
+    return selectedNodes
 }

-function search() {
+function search(after = null) {
+    lastDoc = null;
+
    if (searchBusy) {
        return;
    }
    searchBusy = true;
-    //Clear old search results
+
    let searchResults = document.getElementById("searchResults");
-    while (searchResults.firstChild) {
-        searchResults.removeChild(searchResults.firstChild);
+    //Clear old search results
+    let preload;
+    if (!after) {
+        while (searchResults.firstChild) {
+            searchResults.removeChild(searchResults.firstChild);
+        }
+        preload = makePreloader();
+        searchResults.appendChild(preload);
    }

    let query = searchBar.value;
-    let condition = $("#barToggle").prop("checked") ? "must" : "should";
+    let empty = query === "";
+    let condition = empty ? "should" : "must";
    let filters = [
        {range: {size: {gte: size_min, lte: size_max}}},
        {terms: {index: selectedIndices}}
    ];
+    let fields = [
+        "name^8",
+        "content^3",
+        "album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
+        "font_name^6"
+    ];
+
+    if ($("#fuzzyToggle").prop("checked")) {
+        fields.push("content.nGram");
+        fields.push("name.nGram^3");
+    }

    let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
    if (path !== "") {
        filters.push([{term: {path: path}}])
    }
-    let mimeTypes = getSelectedMimeTypes();
+    let mimeTypes = getSelectedNodes(mimeTree);
    if (!mimeTypes.includes("any")) {
        filters.push([{terms: {"mime": mimeTypes}}]);
    }

-    $.jsonPost("es?scroll=1", {
+    let tags = getSelectedNodes(tagTree);
+    if (!tags.includes("any")) {
+        filters.push([{terms: {"tag": tags}}]);
+    }
+
+    let q = {
        "_source": {
-            excludes: ["content"]
+            excludes: ["content", "_tie"]
        },
        query: {
            bool: {
                [condition]: {
-                    multi_match: {
+                    simple_query_string: {
                        query: query,
-                        type: "most_fields",
-                        fields: [
-                            "name^8", "name.nGram^3", "content^3",
-                            "content.nGram",
-                            "album^8", "artist^8", "title^8", "genre^2", "album_artist^8",
-                            "font_name^6"
-                        ],
-                        operator: "and"
+                        fields: fields,
+                        default_operator: "and"
                    }
                },
                filter: filters
            }
        },
-        sort: [
-            "_score"
+        "sort": [
+            {"_score": {"order": "desc"}},
+            {"_tie": {"order": "asc"}}
        ],
-        highlight: {
+        aggs:
+            {
+                total_size: {"sum": {"field": "size"}},
+                total_count: {"value_count": {"field": "size"}}
+            },
+        size: SIZE,
+    };
+
+    if (after) {
+        q.search_after = [after["_score"], after["_id"]];
+    }
+
+    if (CONF.options.highlight) {
+        q.highlight = {
            pre_tags: ["<mark>"],
            post_tags: ["</mark>"],
            fields: {
                content: {},
+                // "content.nGram": {},
                name: {},
                "name.nGram": {},
-                // font_name: {},
+                font_name: {},
            }
-        },
-        aggs: {
-            total_size: {"sum": {"field": "size"}}
-        },
-        size: SIZE,
-    }).then(searchResult => {
-        scroll_id = searchResult["_scroll_id"];
+        };
+    }

-        //Search stats
-        searchResults.appendChild(makeStatsCard(searchResult));
+    $.jsonPost("es", q).then(searchResult => {
+        let hits = searchResult["hits"]["hits"];
+        if (hits) {
+            lastDoc = hits[hits.length - 1];
+        }

-        //Autocomplete
-        if (searchResult.hasOwnProperty("suggest") && searchResult["suggest"].hasOwnProperty("path")) {
-            pathAutoComplete = [];
-            for (let i = 0; i < searchResult["suggest"]["path"][0]["options"].length; i++) {
-                pathAutoComplete.push(searchResult["suggest"]["path"][0]["options"][i].text)
-            }
+        if (!after) {
+            preload.remove();
+            searchResults.appendChild(makeStatsCard(searchResult));
+        } else {
+            let pageIndicator = makePageIndicator(searchResult);
+            searchResults.appendChild(pageIndicator);
        }

        //Setup page
        let resultContainer = makeResultContainer();
        searchResults.appendChild(resultContainer);

-        docCount = 0;
-        insertHits(resultContainer, searchResult["hits"]["hits"]);
+        window.setTimeout(() => {
+            $(".sp").SmartPhoto({animationSpeed: 0, swipeTopToClose: true, showAnimation: false, forceInterval: 50});
+        }, 100);

+        if (!after) {
+            docCount = 0;
+        }
+        reachedEnd = hits.length !== SIZE;
+        insertHits(resultContainer, hits);
        searchBusy = false;
    });
 }

-let pathAutoComplete = [];
 let size_min = 0;
 let size_max = 10000000000000;

@@ -287,8 +429,8 @@ let searchDebounced = _.debounce(function () {
    coolingDown = false;
    search()
 }, 500);
+
 searchBar.addEventListener("keyup", searchDebounced);
-document.getElementById("pathBar").addEventListener("keyup", searchDebounced);

 //Size slider
 $("#sizeSlider").ionRangeSlider({
@@ -336,18 +478,136 @@ function updateIndices() {
 document.getElementById("indices").addEventListener("change", updateIndices);
 updateIndices();

-//Suggest
-function getPathChoices() {
-    return new Promise(getPaths => {
+window.onkeyup = function (e) {
+    if (e.key === "/" || e.key === "Escape") {
+        const bar = document.getElementById("searchBar");
+        bar.scrollIntoView();
+        bar.focus();
+    }
+};

-        let xhttp = new XMLHttpRequest();
-        xhttp.onreadystatechange = function () {
-            if (this.readyState === 4 && this.status === 200) {
-                getPaths(JSON.parse(xhttp.responseText))
+function getNextDepth(node) {
+    let q = {
+        query: {
+            bool: {
+                filter: [
+                    {term: {index: node.index}},
+                    {term: {_depth: node.depth + 1}}
+                ]
+            }
+        },
+        aggs: {
+            paths: {
+                terms: {
+                    field: "path",
+                    size: 10000
+                }
+            }
+        },
+        size: 0
+    }
+
+    if (node.depth > 0) {
+        q.query.bool.must = {
+            prefix: {
+                path: node.id,
            }
        };
-        xhttp.open("GET", "suggest?prefix=" + pathBar.value, true);
-        xhttp.send();
+    }
+
+    return $.jsonPost("es", q).then(resp => {
+        const buckets = resp["aggregations"]["paths"]["buckets"];
+        if (!buckets) {
+            return false;
+        }
+        return buckets
+            .filter(bucket => bucket.key.length > node.id.length || node.id.startsWith("/"))
+            .sort((a, b) => a.key > b.key)
+            .map(bucket => {
+                const i = bucket.key.lastIndexOf("/");
+                const name = (i === -1 || i === 1) ? bucket.key : bucket.key.slice(i + 1);
+
+                return {
+                    id: bucket.key,
+                    text: `${name}/ (${bucket.doc_count})`,
+                    depth: node.depth + 1,
+                    index: node.index,
+                    children: true,
+                }
+            })
+    })
+}
+
+function handlePathTreeClick(tree) {
+    return (event, node, handler) => {
+
+        if (node.depth !== 0) {
+            $("#pathBar").val(node.id)
+            $("#pathTreeModal").modal("hide")
+            searchDebounced();
+        }
+
+        handler();
+    }
+}
+
+function createPathTree(target) {
+    let pathTree = new InspireTree({
+        data: function (node, resolve, reject) {
+            return getNextDepth(node);
+        }
+    });
+
+    selectedIndices.forEach(index => {
+        pathTree.addNode({
+            id: "/" + index,
+            text: `/[${indexMap[index]}]`,
+            index: index,
+            depth: 0,
+            children: true
+        })
+    })
+
+    new InspireTreeDOM(pathTree, {
+        target: target
+    });
+
+    pathTree.on("node.click", handlePathTreeClick(pathTree));
+
+    const button = document.querySelector("#pathBarHelper")
+    const tooltip = document.querySelector("#pathTreeTooltip")
+    console.log(button)
+    console.log(tooltip)
+    Popper.createPopper(button, tooltip ,{
+        trigger: "click",
+        placement: "right",
    });
 }

+function updateSettings() {
+    CONF.options.display = $("#settingDisplay").val();
+    CONF.options.fuzzy = $("#settingFuzzy").prop("checked");
+    CONF.options.highlight = $("#settingHighlight").prop("checked");
+    CONF.save();
+
+    searchDebounced();
+
+    $.toast({
+        heading: "Settings updated",
+        text: "Settings saved to browser storage",
+        stack: 3,
+        bgColor: "#00a4bc",
+        textColor: "#fff",
+        position: 'bottom-right',
+        hideAfter: 3000,
+        loaderBg: "#08c7e8",
+    });
+}
+
+function loadSettings() {
+    CONF.load();
+
+    $("#settingDisplay").val(CONF.options.display);
+    $("#settingFuzzy").prop("checked", CONF.options.fuzzy);
+    $("#settingHighlight").prop("checked", CONF.options.highlight);
+}
--- a/web/js/util.js
+++ b/web/js/util.js
@@ -3,7 +3,7 @@
 */
 function humanFileSize(bytes) {
    if (bytes === 0) {
-        return "? B"
+        return "0 B"
    }

    let thresh = 1000;
@@ -43,9 +43,9 @@ function humanTime(sec_num) {

 function debounce(func, wait) {
    let timeout;
-    return function() {
+    return function () {
        let context = this, args = arguments;
-        let later = function() {
+        let later = function () {
            timeout = null;
            func.apply(context, args);
        };
@@ -54,3 +54,13 @@ function debounce(func, wait) {
        func.apply(context, args);
    };
 }
+
+function lum(c) {
+    c = c.substring(1);
+    let rgb = parseInt(c, 16);
+    let r = (rgb >> 16) & 0xff;
+    let g = (rgb >> 8) & 0xff;
+    let b = (rgb >> 0) & 0xff;
+
+    return 0.2126 * r + 0.7152 * g + 0.0722 * b;
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
simon987	98e0a5fd64	Update CI script	2020-03-06 09:41:33 -05:00
simon987	740a49a09f	version bump	2020-03-06 09:36:46 -05:00
simon987	81be662574	(breaking) update mime list	2020-03-06 09:36:21 -05:00
simon987	02fa3f02f5	Fix memory leak with virtual files in parse.c	2020-03-06 09:36:07 -05:00
simon987	cfdd7bdd87	Fix memory leak in font.c	2020-03-06 09:35:19 -05:00
simon987	7ceb645926	hotfix invalid read in text_buffer	2020-03-06 09:34:41 -05:00
simon987	7d0091f647	whoops	2020-03-05 21:54:56 -05:00
simon987	b3cd630399	Update README.md	2020-03-05 19:42:06 -05:00
simon987	5f7a1acfe3	Merge pull request #36 from simon987/wip-doc Wip doc	2020-03-05 18:43:56 -05:00
simon987	513a21cca2	Undo debug stuff	2020-03-05 18:42:51 -05:00
simon987	04dbfb23ab	Cleanup warnings	2020-03-05 16:53:30 -05:00
simon987	1abddabeec	Rewrite doc.c module, fix bad error handling, fix pdf.c memory leaks	2020-03-05 16:12:34 -05:00
simon987	9ace5774af	Update dependencies	2020-03-05 16:10:45 -05:00
simon987	eab6101cf7	make --fast faster	2020-03-05 12:26:43 -05:00
simon987	d7cbd5d2b6	wip doc rewrite	2020-03-05 09:13:37 -05:00
simon987	641edf2715	Prettier warning messages in main.c	2020-03-04 17:57:49 -05:00
simon987	7efb4957bf	inline text/util functions	2020-03-04 17:50:31 -05:00
simon987	9ae77fdedb	Fix css glitch	2020-03-03 16:51:01 -05:00
simon987	98c40901ed	Disallow incremental scan when version does not match (#33 )	2020-03-03 16:36:07 -05:00
simon987	363375d5da	version bump	2020-03-03 16:25:41 -05:00
simon987	149de95d88	(breaking) Upgrade path filter bar	2020-03-03 16:24:24 -05:00
simon987	e5bb4856d2	(breaking) Set item depth in ingest pipeline	2020-03-02 17:39:25 -05:00
simon987	d78994d427	Ignore --incremental option when the directory does not exist (#31 )	2020-03-01 21:16:50 -05:00
simon987	f2d68d54df	Update README.md	2020-03-01 13:55:08 -05:00
simon987	e03625838b	Settings menu (#30 ) and UI tweaks	2020-02-29 19:26:09 -05:00
simon987	86840b46f4	Version bump	2020-02-27 09:47:06 -05:00
simon987	e57f9916eb	Rewrite documentation	2020-02-27 09:45:14 -05:00
simon987	565ba6ee76	Fix for #29	2020-02-27 09:44:19 -05:00
simon987	d83fc2c373	Fix docker build for 1.2.15	2020-02-27 09:42:18 -05:00
simon	d4da28249e	--fast option #27	2020-02-22 18:37:08 -05:00
simon	483a454c8d	--exclude argument #26	2020-02-22 16:55:35 -05:00
simon	018ac86640	fix build...	2020-02-22 13:20:41 -05:00
simon	398f1aead4	Support for cbr documents	2020-02-22 13:11:19 -05:00
simon	d19a75926b	Fix invalid read in terminate_string()	2020-02-22 13:10:40 -05:00
simon	1ac8b40e3d	Code style	2020-02-22 09:02:59 -05:00
simon	a8505cb8c1	Fix for #28	2020-02-20 16:42:13 -05:00
simon	ae8652d86e	UI tweaks, search syntax (#25 )	2020-02-16 15:24:29 -05:00
simon	849beb09d8	hotfix	2020-02-15 19:33:18 -05:00
simon	e1aaaee617	UI tweak	2020-02-15 09:30:14 -05:00
simon	c02b940945	(I forgot to commit this)	2020-02-14 20:58:10 -05:00
simon	2934ddb07f	Add image viewer (#2 )	2020-02-14 18:28:55 -05:00
simon	7f6f3c02fa	OCR tweaks	2020-02-11 21:13:47 -05:00
simon	7f98d5a682	Fix buffer overflow (whoops)	2020-02-09 18:11:29 -05:00
simon	7eb9c5d7d5	Fix web/index issue with NULL mime types	2020-02-09 17:23:49 -05:00
simon	184439aa38	increase minimum image size for OCR	2020-02-09 14:06:59 -05:00
simon	1ce8b298a1	Display EXIF tags on document info panel, remove march=native on openjp	2020-02-09 13:21:19 -05:00
simon	75f99025d9	add exif dateTime, allow some special characters in text meta	2020-02-09 08:47:13 -05:00
simon	ebe852bd5a	Fix rewrite-url arg	2020-02-09 08:23:17 -05:00
simon	402b103c49	Fix total count for ES 7.5	2020-02-08 09:25:00 -05:00
simon	e9b6e1cdc2	Turn off auto optimisation in libtesseract build	2020-02-08 08:32:04 -05:00
simon	ed1ce8ab5e	Handle XML errors #18	2020-02-07 10:08:01 -05:00
simon	d1fa4febc4	Improve scroll feature, UI fix	2020-02-07 10:08:01 -05:00
simon987	048c55df7b	Update README.md	2020-02-06 19:56:29 -05:00
simon987	f77bc6a025	Update README.md	2020-02-06 19:55:32 -05:00
simon	efdde2734e	version bump	2020-02-06 19:28:05 -05:00
simon	66658fa8f7	Remove trailing/leading white space in text meta fields	2020-02-06 19:27:30 -05:00
simon	df41c251e4	(Breaking!) Add some exif tags	2020-02-06 19:21:50 -05:00
simon	3282ab56ba	Version bump	2020-02-02 09:26:54 -05:00
simon	8300838d30	Suppress XML parsing errors (#18 )	2020-02-02 09:26:03 -05:00
simon	c9870a6d3d	Remove -march=native for release build...	2020-02-02 09:03:06 -05:00
simon	a143cc4fcf	bundle openssl...	2020-02-02 08:39:20 -05:00
simon	9ef1f3781d	fix attempt for #11	2020-02-01 20:04:26 -05:00
simon	bbee8aa721	tesseract ocr path fix	2020-02-01 20:03:59 -05:00
simon	d22f83c797	curl fix	2020-02-01 15:22:43 -05:00
simon	50615486a4	curl fix attempt	2020-02-01 14:42:42 -05:00
simon	ca79e4f797	add /status endpoint	2020-01-28 10:18:37 -05:00
simon987	6a9fd08a80	Merge pull request #21 from simon987/wip-20 Fixes #20	2020-01-27 09:16:00 -05:00
simon987	cab890dc9b	#20 wip	2020-01-27 09:09:42 -05:00
simon987	b3c4faf2df	Update README.md	2020-01-26 12:37:13 -05:00
simon987	353937171a	Update README.md	2020-01-20 15:54:53 -05:00
simon	c80002bea4	Bundle libcurl attempt 2	2020-01-18 11:53:12 -05:00
simon	56adee9d81	Bundle libcurl, libopc bugfix #18	2020-01-18 10:25:02 -05:00
simon	d6493d6d5f	Bundle libpng	2020-01-16 16:21:38 -05:00
simon	0967e9676d	remove static build in CI...	2020-01-16 15:45:18 -05:00
simon	487e998ea0	Display error message on /d/ error	2020-01-16 15:04:50 -05:00
simon	919f45c79c	Document info modal #19	2020-01-16 14:37:19 -05:00
simon987	d42129cfcb	CI fix attempt	2020-01-15 20:11:45 -05:00
simon987	754983e34a	Minor cleanup	2020-01-15 18:16:06 -05:00
simon	7c8a3e2f9d	Support for external json indices	2020-01-14 15:44:31 -05:00
simon	3bb24b4453	Use bundled libtiff	2020-01-14 12:21:26 -05:00
simon	9a56b959d3	Fix build problems...	2020-01-14 10:55:02 -05:00
simon	5e3a2dbcc2	Update README	2020-01-14 10:47:00 -05:00
simon	573f94f24e	OCR support, remove static build	2020-01-14 10:26:40 -05:00
simon	f5db78a69f	Ignore special ascii chars, strip binary in docker build	2020-01-12 10:59:17 -05:00
simon	5a2820d339	UI tweak auto-select based on query args	2020-01-11 17:48:51 -05:00
simon	b7f13f425c	Fix memory leaks (whoops)	2020-01-11 17:34:34 -05:00
simon	d1a2f9b1d5	Strip binary (CI)	2020-01-07 14:32:39 -05:00
simon	71f17986db	build settings	2020-01-06 21:34:41 -05:00
simon	acdd2fb3c1	Use bundled ffmpeg libraries	2020-01-06 16:25:34 -05:00
simon987	0cda6c00e1	CI attempt	2020-01-03 20:21:07 -05:00
simon987	14d0e5a1e1	possible fix for #18	2019-12-28 14:32:42 -05:00
simon987	0d06d39281	Path in list view #16	2019-12-28 14:32:05 -05:00
simon987	80708ca636	Merge pull request #17 from dpieski/patch-1 maybe a typo in cli.c	2019-12-23 18:33:28 -05:00
Andrew	43b7b40dc4	maybe a typo in cli.c possibly corrected a typo	2019-12-23 13:18:18 -06:00
simon987	d051f541e2	Show client error on ES connection failure, fixes #13	2019-12-21 20:52:53 -05:00
simon987	0eefbac7b4	Update libopc. should fix #14	2019-12-21 19:43:33 -05:00
simon987	663f8e21c1	Better logging, fixes #15	2019-12-21 12:32:08 -05:00
simon	80fbcb2a01	empty docx bugfix	2019-12-19 17:26:11 -05:00
simon	8451109ecd	OOXML files support	2019-12-19 16:53:18 -05:00
simon	d6fe61cfdc	Clarify help string for es url #12	2019-12-19 16:52:22 -05:00
simon	254094130f	Fix submodules	2019-12-13 12:35:39 -05:00
simon	eaaa75c04c	Fix submodules	2019-12-13 11:24:17 -05:00
simon	bb87f4270f	Update docker script	2019-12-13 11:16:17 -05:00
simon	be23201210	Archive file support	2019-12-13 10:53:51 -05:00
simon	9778acda77	uifix	2019-12-12 19:19:53 -05:00
simon	8d187926d9	Bugfix with incremental comparison	2019-12-12 15:41:31 -05:00
simon987	88c37e3523	Update README.md	2019-12-04 20:56:52 -05:00
simon	d816dae8b3	UI fix, disable thumbnail option, batch index size option	2019-12-01 10:57:29 -05:00
simon	4346c3e063	Also use static libraries in sist2 build	2019-11-30 20:02:26 -05:00
simon	1a1032a8a7	Cleaner shutdown	2019-11-30 19:59:11 -05:00
simon	4ab2ba1a02	#8 Skip PDF scan when content-size is 0	2019-11-21 16:06:31 -05:00
simon	d089601dc5	Add sfv & m3u	2019-11-20 12:31:31 -05:00
simon	11df6cc88f	Add nfo to ext list	2019-11-20 11:41:50 -05:00
simon	373ac01e4e	Fix for #3 and maximum scan depth	2019-11-19 11:23:30 -05:00
simon	893ff145c5	List mode tweak	2019-11-17 16:28:47 -05:00
simon987	6111ded77f	Merge pull request #6 from simon987/wip List mode #5	2019-11-17 16:15:36 -05:00
simon	34cc26b2fd	List mode #5 wip	2019-11-17 15:03:24 -05:00
simon	204034d859	Add basic auth. Fixes #4	2019-11-17 10:00:17 -05:00
simon	16ccc6c0d3	Show error message on elasticsearch connection fail	2019-11-17 09:55:16 -05:00
simon	94c617fdc3	Bug fix	2019-11-12 22:11:50 -05:00
simon	ebfd7e03ce	User scripts, bug fixes, docker image	2019-11-12 20:58:43 -05:00
simon	6931d320a2	bugfix with invalid/corrupted index path	2019-11-11 20:49:38 -05:00
simon	fc22e52eae	Image placeholder	2019-11-09 23:26:49 -05:00
simon	ba81748a74	Update build	2019-11-09 17:15:20 -05:00
simon	e72fa1587b	EXIF metadata for images	2019-11-09 15:18:44 -05:00
simon	ea4fb7fa0d	Bug fixes	2019-11-09 12:00:07 -05:00
simon	b0a868bb73	remove 'must match'	2019-11-08 21:46:54 -05:00
simon	d761a3b595	update readme	2019-11-08 19:42:36 -05:00
simon	2d7a8a2fdc	fuzzy toggle	2019-11-08 16:15:10 -05:00
simon	152d2ddf8a	bug fix in deserialize	2019-11-08 09:03:44 -05:00
simon	bc5f22b759	update readme	2019-11-05 18:59:00 -05:00
simon	534b397876	update readme, UI tweak: don't show broken images	2019-11-03 10:39:02 -05:00
simon	7962a994e2	utf8 update + bug fixes	2019-11-03 07:50:31 -05:00
simon	f8f1a27180	video metadata	2019-10-31 11:54:13 -04:00
simon	784c3c9435	Font rendering fixes	2019-10-31 10:15:01 -04:00
simon	f8b081a3f4	UI tweaks, path autocomplete	2019-10-31 08:26:19 -04:00
simon	5661573b06	Dark theme, pdf meta, de-serialize bugfix	2019-10-30 22:20:22 -04:00
simon	130fb78787	Fix some memory leaks	2019-10-27 15:40:48 -04:00
simon	2943ca9365	UI tweak	2019-10-27 14:10:24 -04:00
simon	7234c22d2f	epub fix	2019-10-27 14:00:52 -04:00
simon	bdbd7ca7ed	cbz fix	2019-10-27 13:33:55 -04:00
simon	9b7c56a608	Static build (scan only)	2019-10-27 12:25:34 -04:00
simon	4109ba6d34	Fix files with # character in url redirect	2019-10-27 08:30:47 -04:00
simon	69f0c1f2cf	do 'should' search if search bar is empty	2019-10-26 21:56:03 -04:00
simon	c063d87232	Fix bug with files with multiple video or audio streams	2019-10-26 21:12:51 -04:00
simon	f44e6336dc	Deserialize typo	2019-10-26 20:54:46 -04:00
simon	7be6234f0d	Add preloader (UI Tweak)	2019-10-26 20:49:50 -04:00
simon	85ab2858f6	Fix UI bugs	2019-10-26 20:28:29 -04:00
simon	cbb043f03f	Fix elasticsearch 6 bug	2019-10-26 20:18:58 -04:00
simon	50fcec25f7	Fix flaky mime table generation	2019-10-26 19:53:41 -04:00
simon	b4199a1fd8	fix for elasticsearch	2019-10-26 19:09:26 -04:00
simon	ef79681024	fix for elasticsearch 6.8	2019-10-26 17:58:52 -04:00
simon	0d4deb3eb3	Update readme	2019-10-26 17:40:06 -04:00
				`@@ -0,0 +1 @@`
				.jq-toast-wrap,.jq-toast-wrap *{margin:0;padding:0}.jq-toast-wrap{display:block;position:fixed;width:250px;pointer-events:none!important;letter-spacing:normal;z-index:9000!important}.jq-toast-wrap.bottom-left{bottom:20px;left:20px}.jq-toast-wrap.bottom-right{bottom:20px;right:40px}.jq-toast-wrap.top-left{top:20px;left:20px}.jq-toast-wrap.top-right{top:20px;right:40px}.jq-toast-single{display:block;width:100%;padding:10px;margin:0 0 5px;border-radius:4px;font-size:12px;font-family:arial,sans-serif;line-height:17px;position:relative;pointer-events:all!important;background-color:#444;color:#fff}.jq-toast-single h2{font-family:arial,sans-serif;font-size:14px;margin:0 0 7px;background:0 0;color:inherit;line-height:inherit;letter-spacing:normal}.jq-toast-single a{color:#eee;text-decoration:none;font-weight:700;border-bottom:1px solid #fff;padding-bottom:3px;font-size:12px}.jq-toast-single ul{margin:0 0 0 15px;background:0 0;padding:0}.jq-toast-single ul li{list-style-type:disc!important;line-height:17px;background:0 0;margin:0;padding:0;letter-spacing:normal}.close-jq-toast-single{position:absolute;top:3px;right:7px;font-size:14px;cursor:pointer}.jq-toast-loader{display:block;position:absolute;top:-2px;height:5px;width:0;left:0;border-radius:5px;background:red}.jq-toast-loaded{width:100%}.jq-has-icon{padding:10px 10px 10px 50px;background-repeat:no-repeat;background-position:10px}.jq-icon-info{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGwSURBVEhLtZa9SgNBEMc9sUxxRcoUKSzSWIhXpFMhhYWFhaBg4yPYiWCXZxBLERsLRS3EQkEfwCKdjWJAwSKCgoKCcudv4O5YLrt7EzgXhiU3/4+b2ckmwVjJSpKkQ6wAi4gwhT+z3wRBcEz0yjSseUTrcRyfsHsXmD0AmbHOC9Ii8VImnuXBPglHpQ5wwSVM7sNnTG7Za4JwDdCjxyAiH3nyA2mtaTJufiDZ5dCaqlItILh1NHatfN5skvjx9Z38m69CgzuXmZgVrPIGE763Jx9qKsRozWYw6xOHdER+nn2KkO+Bb+UV5CBN6WC6QtBgbRVozrahAbmm6HtUsgtPC19tFdxXZYBOfkbmFJ1VaHA1VAHjd0pp70oTZzvR+EVrx2Ygfdsq6eu55BHYR8hlcki+n+kERUFG8BrA0BwjeAv2M8WLQBtcy+SD6fNsmnB3AlBLrgTtVW1c2QN4bVWLATaIS60J2Du5y1TiJgjSBvFVZgTmwCU+dAZFoPxGEEs8nyHC9Bwe2GvEJv2WXZb0vjdyFT4Cxk3e/kIqlOGoVLwwPevpYHT+00T+hWwXDf4AJAOUqWcDhbwAAAAASUVORK5CYII=);background-color:#31708f;color:#d9edf7;border-color:#bce8f1}.jq-icon-warning{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAGYSURBVEhL5ZSvTsNQFMbXZGICMYGYmJhAQIJAICYQPAACiSDB8AiICQQJT4CqQEwgJvYASAQCiZiYmJhAIBATCARJy+9rTsldd8sKu1M0+dLb057v6/lbq/2rK0mS/TRNj9cWNAKPYIJII7gIxCcQ51cvqID+GIEX8ASG4B1bK5gIZFeQfoJdEXOfgX4QAQg7kH2A65yQ87lyxb27sggkAzAuFhbbg1K2kgCkB1bVwyIR9m2L7PRPIhDUIXgGtyKw575yz3lTNs6X4JXnjV+LKM/m3MydnTbtOKIjtz6VhCBq4vSm3ncdrD2lk0VgUXSVKjVDJXJzijW1RQdsU7F77He8u68koNZTz8Oz5yGa6J3H3lZ0xYgXBK2QymlWWA+RWnYhskLBv2vmE+hBMCtbA7KX5drWyRT/2JsqZ2IvfB9Y4bWDNMFbJRFmC9E74SoS0CqulwjkC0+5bpcV1CZ8NMej4pjy0U+doDQsGyo1hzVJttIjhQ7GnBtRFN1UarUlH8F3xict+HY07rEzoUGPlWcjRFRr4/gChZgc3ZL2d8oAAAAASUVORK5CYII=);background-color:#8a6d3b;color:#fcf8e3;border-color:#faebcc}.jq-icon-error{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAHOSURBVEhLrZa/SgNBEMZzh0WKCClSCKaIYOED+AAKeQQLG8HWztLCImBrYadgIdY+gIKNYkBFSwu7CAoqCgkkoGBI/E28PdbLZmeDLgzZzcx83/zZ2SSXC1j9fr+I1Hq93g2yxH4iwM1vkoBWAdxCmpzTxfkN2RcyZNaHFIkSo10+8kgxkXIURV5HGxTmFuc75B2RfQkpxHG8aAgaAFa0tAHqYFfQ7Iwe2yhODk8+J4C7yAoRTWI3w/4klGRgR4lO7Rpn9+gvMyWp+uxFh8+H+ARlgN1nJuJuQAYvNkEnwGFck18Er4q3egEc/oO+mhLdKgRyhdNFiacC0rlOCbhNVz4H9FnAYgDBvU3QIioZlJFLJtsoHYRDfiZoUyIxqCtRpVlANq0EU4dApjrtgezPFad5S19Wgjkc0hNVnuF4HjVA6C7QrSIbylB+oZe3aHgBsqlNqKYH48jXyJKMuAbiyVJ8KzaB3eRc0pg9VwQ4niFryI68qiOi3AbjwdsfnAtk0bCjTLJKr6mrD9g8iq/S/B81hguOMlQTnVyG40wAcjnmgsCNESDrjme7wfftP4P7SP4N3CJZdvzoNyGq2c/HWOXJGsvVg+RA/k2MC/wN6I2YA2Pt8GkAAAAASUVORK5CYII=);background-color:#a94442;color:#f2dede;border-color:#ebccd1}.jq-icon-success{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAADsSURBVEhLY2AYBfQMgf///3P8+/evAIgvA/FsIF+BavYDDWMBGroaSMMBiE8VC7AZDrIFaMFnii3AZTjUgsUUWUDA8OdAH6iQbQEhw4HyGsPEcKBXBIC4ARhex4G4BsjmweU1soIFaGg/WtoFZRIZdEvIMhxkCCjXIVsATV6gFGACs4Rsw0EGgIIH3QJYJgHSARQZDrWAB+jawzgs+Q2UO49D7jnRSRGoEFRILcdmEMWGI0cm0JJ2QpYA1RDvcmzJEWhABhD/pqrL0S0CWuABKgnRki9lLseS7g2AlqwHWQSKH4oKLrILpRGhEQCw2LiRUIa4lwAAAABJRU5ErkJggg==);color:#dff0d8;background-color:#3c763d;border-color:#d6e9c6}