Fix bug with media files, don't encode thumbnail when not necessary

Configurable column count
Set timeout for HTTP get request
2025-12-12 23:18:51 +00:00 · 2020-07-26 11:52:48 -04:00 · 2020-07-26 11:50:21 -04:00 · 2020-07-25 19:55:27 -04:00 · 2020-07-25 17:26:17 -04:00 · 2020-07-25 09:37:37 -04:00
42 changed files with 1550 additions and 415 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,7 +30,7 @@ add_executable(
        third-party/argparse/argparse.h third-party/argparse/argparse.c

        src/cli.c src/cli.h
-        src/stats.c src/stats.h)
+        src/stats.c src/stats.h src/ctx.c)

 target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
 set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@@ -40,6 +40,7 @@ find_package(cJSON CONFIG REQUIRED)
 find_package(unofficial-glib CONFIG REQUIRED)
 find_package(unofficial-mongoose CONFIG REQUIRED)
 find_library(UUID_LIB NAMES uuid)
+find_package(CURL CONFIG REQUIRED)

 #find_package(OpenSSL REQUIRED)

@@ -56,7 +57,6 @@ target_compile_options(
        sist2
        PRIVATE
        -fPIC
-        -Werror
 )

 if (SIST_DEBUG)
@@ -67,6 +67,7 @@ if (SIST_DEBUG)
            -fstack-protector
            -fno-omit-frame-pointer
            -fsanitize=address
+            -O2
    )
    target_link_options(
            sist2
@@ -80,6 +81,7 @@ if (SIST_DEBUG)
            OUTPUT_NAME sist2_debug
    )
 else ()
+#    set(VCPKG_BUILD_TYPE release)
    target_compile_options(
            sist2
            PRIVATE
@@ -105,6 +107,7 @@ target_link_libraries(
        unofficial::glib::glib
        unofficial::mongoose::mongoose
 #        OpenSSL::SSL OpenSSL::Crypto
+        CURL::libcurl

        ${UUID_LIB}
        pthread
--- a/README.md
+++ b/README.md
@@ -15,10 +15,10 @@ sist2 (Simple incremental search tool)
 * Fast, low memory usage, multi-threaded
 * Mobile-friendly Web interface
 * Portable (all its features are packaged in a single executable)
-* Extracts text from common file types \*
+* Extracts text and metadata from common file types \*
 * Generates thumbnails \*
 * Incremental scanning
-* Automatic tagging from file attributes via [user scripts](scripting/README.md)
+* Manual tagging from the UI and automatic tagging based on file attributes via [user scripts](docs/scripting.md)
 * Recursive scan inside archive files \*\*
 * OCR support with tesseract \*\*\*
 * Stats page & disk utilisation visualization
@@ -72,15 +72,17 @@ See [Usage guide](docs/USAGE.md) for more details

 File type | Library | Content | Thumbnail | Metadata
 :---|:---|:---|:---|:---
-pdf,xps,cbz,cbr,fb2,epub | MuPDF | text+ocr | yes, `png` | title |
-`audio/*` | ffmpeg | - | yes, `jpeg` | ID3 tags |
-`video/*` | ffmpeg | - | yes, `jpeg` | title, comment, artist |
-`image/*` | ffmpeg | - | yes, `jpeg` | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
+pdf,xps,fb2,epub | MuPDF | text+ocr | yes | title |
+cbz,cbr | *(none)* | - | yes | - |
+`audio/*` | ffmpeg | - | yes | ID3 tags |
+`video/*` | ffmpeg | - | yes | title, comment, artist |
+`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190) |
+raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf  | LibRaw | - | yes | Common EXIF tags |
 ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
 `text/plain` | *(none)* | yes | no | - |
 html, xml | *(none)* | yes | no | - |
 tar, zip, rar, 7z, ar ...  | Libarchive | yes\* | - | no |
-docx, xlsx, pptx | *(none)* | yes | no | creator, modified_by, title |
+docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
 mobi, azw, azw3 | libmobi | yes | no | author, title |

 \* *See [Archive files](#archive-files)*
@@ -100,7 +102,7 @@ scan is also supported.
 
 ### OCR

-You can enable OCR support for pdf,xps,cbz,cbr,fb2,epub file types with the
+You can enable OCR support for pdf,xps,fb2,epub file types with the
 `--ocr <lang>` option. Download the language data files with your
 package manager (`apt install tesseract-ocr-eng`) or directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).

@@ -122,12 +124,12 @@ binaries (GCC 7+ required).
 1. Install compile-time dependencies

   ```bash
-   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic
+   vcpkg install lmdb cjson glib libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest mongoose libuuid libmagic libraw
   ```

 2. Build
    ```bash
    git clone --recursive https://github.com/simon987/sist2/
-    cmake -D <VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
+    cmake -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
    make
    ```
--- a/ci/build.sh
+++ b/ci/build.sh
@@ -1,16 +1,17 @@
 #!/usr/bin/env bash

+VCPKG_ROOT="/vcpkg"

 rm *.gz

 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-make
+cmake -DSIST_DEBUG=off -DVCPKG_BUILD_TYPE=release -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 12
 strip sist2
 gzip -9 sist2

 rm -rf CMakeFiles CMakeCache.txt
-cmake -DSIST_DEBUG=on -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
-make
+cmake -DSIST_DEBUG=on -DVCPKG_BUILD_TYPE=debug -DCMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" .
+make -j 12
 cp /usr/lib/x86_64-linux-gnu/libasan.so.2.0.0 libasan.so.2
 tar -czf sist2_debug.tar.gz sist2_debug libasan.so.2
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -14,50 +14,55 @@
    * [examples](#web-examples)
    * [rewrite_url](#rewrite_url)
    * [link to specific indices](#link-to-specific-indices)
+* [exec-script](#exec-script)
+* [tagging](#tagging)

 ```
 Usage: sist2 scan [OPTION]... PATH
   or: sist2 index [OPTION]... INDEX
   or: sist2 web [OPTION]... INDEX...
+   or: sist2 exec-script [OPTION]... INDEX
 Lightning-fast file system indexer and search tool.

-    -h, --help            show this help message and exit
-    -v, --version         Show version and exit
-    --verbose             Turn on logging
-    --very-verbose        Turn on debug messages
+    -h, --help                    show this help message and exit
+    -v, --version                 Show version and exit
+    --verbose                     Turn on logging
+    --very-verbose                Turn on debug messages

 Scan options
-    -t, --threads=<int>   Number of threads. DEFAULT=1
-    -q, --quality=<flt>   Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
-    --size=<int>          Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
-    --content-size=<int>  Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
-    --incremental=<str>   Reuse an existing index and only scan modified files.
-    -o, --output=<str>    Output directory. DEFAULT=index.sist2/
-    --rewrite-url=<str>   Serve files from this url instead of from disk.
-    --name=<str>          Index display name. DEFAULT: (name of the directory)
-    --depth=<int>         Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
-    --archive=<str>       Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
-    --ocr=<str>           Tesseract language (use tesseract --list-langs to see which are installed on your machine)
-    -e, --exclude=<str>   Files that match this regex will not be scanned
-    --fast                Only index file names & mime type
+    -t, --threads=<int>           Number of threads. DEFAULT=1
+    -q, --quality=<flt>           Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5
+    --size=<int>                  Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500
+    --content-size=<int>          Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768
+    --incremental=<str>           Reuse an existing index and only scan modified files.
+    -o, --output=<str>            Output directory. DEFAULT=index.sist2/
+    --rewrite-url=<str>           Serve files from this url instead of from disk.
+    --name=<str>                  Index display name. DEFAULT: (name of the directory)
+    --depth=<int>                 Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
+    --archive=<str>               Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
+    --ocr=<str>                   Tesseract language (use tesseract --list-langs to see which are installed on your machine)
+    -e, --exclude=<str>           Files that match this regex will not be scanned
+    --fast                        Only index file names & mime type
    --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
-    --mem-buffer=<int>            Maximum memory buffer size in MB for files inside archives (see USAGE.md). DEFAULT: 2000
-
+    --mem-buffer=<int>            Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000

 Index options
-    --es-url=<str>        Elasticsearch url with port. DEFAULT=http://localhost:9200
-    -p, --print           Just print JSON documents to stdout.
-    --script-file=<str>   Path to user script.
-    --batch-size=<int>    Index batch size. DEFAULT: 100
-    -f, --force-reset     Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)
-
+    -t, --threads=<int>           Number of threads. DEFAULT=1
+    --es-url=<str>                Elasticsearch url with port. DEFAULT=http://localhost:9200
+    -p, --print                   Just print JSON documents to stdout.
+    --script-file=<str>           Path to user script.
+    --batch-size=<int>            Index batch size. DEFAULT: 100
+    -f, --force-reset             Reset Elasticsearch mappings and settings. (You must use this option the first time you use the index command)

 Web options
-    --es-url=<str>        Elasticsearch url. DEFAULT=http://localhost:9200
-    --bind=<str>          Listen on this address. DEFAULT=localhost:4090
-    --auth=<str>          Basic auth in user:password format
-Made by simon987 <me@simon987.net>. Released under GPL-3.0
+    --es-url=<str>                Elasticsearch url. DEFAULT=http://localhost:9200
+    --bind=<str>                  Listen on this address. DEFAULT=localhost:4090
+    --auth=<str>                  Basic auth in user:password format
+    --tag-auth=<str>              Basic auth in user:password format for tagging

+Exec-script options
+    --script-file=<str>           Path to user script.
+Made by simon987 <me@simon987.net>. Released under GPL-3.0
 ```

 ## Scan
@@ -65,7 +70,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
 ### Scan options

 * `-t, --threads` 
-      Number of threads for file parsing. **Do not set a number higher than `$(nproc)`!**.
+      Number of threads for file parsing. **Do not set a number higher than `$(nproc)` or `$(Get-WmiObject Win32_ComputerSystem).NumberOfLogicalProcessors` in Windows!**
 * `-q, --quality` 
    Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. *Does not affect PDF thumbnails quality*
 * `--size` 
@@ -142,7 +147,10 @@ documents.idx/
 ├── agg_mime.csv
 ├── agg_date.csv
 ├── add_size.csv
-└── thumbs
+├── thumbs
+|   ├── data.mdb
+|   └── lock.mdb
+└── tags
    ├── data.mdb
    └── lock.mdb
 ```
@@ -234,7 +242,7 @@ it is currently unsupported and has no guaranties of back/forward compatibility.
 * `-p, --print` 
    Print index in JSON format to stdout.
 * `--script-file` 
-    Path to user script. See [Scripting](scripting/README.md).
+    Path to user script. See [Scripting](scripting.md).
 * `--batch-size=<int>` 
    Index batch size. Indexing is generally faster with larger batches, but payloads that
    are too large will fail and additional overhead for retrying with smaller sizes may slow
@@ -267,6 +275,8 @@ sist2 index --print ./my_index/ | jq | less
 * `--es-url=<str>` Elasticsearch url.
 * `--bind=<str>` Listen on this address.
 * `--auth=<str>` Basic auth in user:password format
+ * `--tag-auth=<str>` Basic auth in user:password format. Works the same way as the 
+    `--auth` argument, but authentication is only applied the `/tag/` endpoint.
 
 ### Web examples

@@ -294,3 +304,35 @@ Both the `root` and `rewrite_url` fields are safe to manually modify from the
 To link to specific indices, you can add a list of comma-separated index name to 
 the URL: `?i=<name>,<name>`. By default, indices with `"(nsfw)"` in their name are
 not displayed.
+
+## exec-script
+
+The `exec-script` command is used to execute a user script for an index that has already been imported to Elasticsearch with the `index` command. Note that the documents will not be reset to their default state before each execution as the `index` command does: if you make undesired changes to the documents by accident, you will need to run `index` again to revert to the original state.
+
+
+# Tagging
+
+### Manual tagging
+
+You can modify tags of individual documents directly from the 
+ `web` interface. Note that you can setup authentication for this feature
+ with the `--tag-auth` option (See [web options](#web-options))
+
+![manual_tag](manual_tag.png)
+
+Tags that are manually added are saved both in the 
+ index folder (in `/tags/`) and in Elasticsearch*. When re-`index`ing, 
+ they are read from the index and automatically applied.
+ 
+You can safely copy the `/tags/` database to another index.
+
+See [Automatic tagging](#automatic-tagging) for information about tag 
+ hierarchies and tag colors.
+
+\* *It can take a few seconds to take effect in new search queries, and the page needs 
+    to be reloaded for the tag tab to update*
+
+
+### Automatic tagging
+
+See [scripting](docs/scripting.md) documentation.
--- a/docs/manual_tag.png
+++ b/docs/manual_tag.png
--- a/docs/scripting.md
+++ b/docs/scripting.md
@@ -39,7 +39,7 @@ it adds the `genre.<genre>` tag.
 ArrayList tags = ctx._source.tag = new ArrayList();

 if (ctx._source?.genre != null) {
-    tags.add("genre." + ctx._source.genre.toLowerCase())
+    tags.add("genre." + ctx._source.genre.toLowerCase());
 }
 ```

@@ -67,7 +67,7 @@ ArrayList tags = ctx._source.tag = new ArrayList();

 Matcher m = /[\(\.+](20[0-9]{2})[\)\.+]/.matcher(ctx._source.name);
 if (m.find()) {
-    tags.add("year." + m.group(1))
+    tags.add("year." + m.group(1));
 }
 ```

@@ -111,16 +111,6 @@ if (ctx._source.path != "") {
 }
 ```

-Set the name of the last folder (`/path/to/<studio>/file.mp4`) to `studio.<studio>` tag
-```Java
-ArrayList tags = ctx._source.tag = new ArrayList();
-
-if (ctx._source.path != "") {
-    String[] names = ctx._source.path.splitOnToken('/');
-    tags.add("studio." + names[names.length-1]);
-}
-```
-
 Parse `EXIF:F Number` tag
 ```Java
 if (ctx._source?.exif_fnumber != null) {
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -126,7 +126,12 @@
      }
    },
    "tag": {
-      "type": "keyword"
+      "type": "keyword",
+      "copy_to": "suggest-tag"
+    },
+    "suggest-tag": {
+      "type": "completion",
+      "analyzer": "case_insensitive_kw_analyzer"
    },
    "exif_make": {
      "type": "text"
--- a/schema/settings.json
+++ b/schema/settings.json
@@ -1,7 +1,8 @@
 {
  "index": {
    "refresh_interval": "30s",
-    "codec": "best_compression"
+    "codec": "best_compression",
+    "number_of_replicas": 0
  },
  "analysis": {
    "tokenizer": {
--- a/scripts/mime.csv
+++ b/scripts/mime.csv
@@ -111,7 +111,7 @@ application/x-dbf, dbf
 application/x-dbt,
 application/x-debian-package, deb
 application/x-deepv, deepv
-application/x-director, dcr|dir|dxr
+application/x-director, dir|dxr
 application/x-dmp, dmp
 application/x-dosdriver,
 application/x-dosexec, dll
@@ -430,3 +430,21 @@ video/x-sgi-movie, movie|mv
 x-epoc/x-sisx-app,
 application/x-zstd-dictionary,
 application/vnd.ms-outlook, msg
+image/x-olympus-orf, orf
+image/x-nikon-nef, nef
+image/x-fuji-raf, raf
+image/x-panasonic-raw, rw2|raw
+image/x-adobe-dng, dng
+image/x-canon-cr2, cr2
+image/x-canon-crw, crw
+image/x-dcraw,
+image/x-kodak-dcr, dcr
+image/x-kodak-k25, k25
+image/x-kodak-kdc, kdc
+image/x-minolta-mrw, mrw
+image/x-pentax-pef, pef
+image/x-sigma-x3f, xf3
+image/x-sony-arw, arw
+image/x-sony-sr2, sr2
+image/x-sony-srf, srf
+image/x-epson-erf, erf
--- a/scripts/mime.py
+++ b/scripts/mime.py
@@ -18,7 +18,6 @@ major_mime = {

 pdf = (
    "application/pdf",
-    "application/x-cbz",
    "application/epub+zip",
    "application/vnd.ms-xpsdocument",
 )
@@ -73,6 +72,29 @@ markup = (
    "text/x-sgml"
 )

+raw = (
+    "image/x-olympus-orf",
+    "image/x-nikon-nef",
+    "image/x-fuji-raf",
+    "image/x-panasonic-raw",
+    "image/x-adobe-dng",
+    "image/x-canon-cr2",
+    "image/x-canon-crw",
+    "image/x-dcraw",
+    "image/x-kodak-dcr",
+    "image/x-kodak-k25",
+    "image/x-kodak-kdc",
+    "image/x-minolta-mrw",
+    "image/x-pentax-pef",
+    "image/x-sigma-x3f",
+    "image/x-sony-arw",
+    "image/x-sony-sr2",
+    "image/x-sony-srf",
+    "image/x-minolta-mrw",
+    "image/x-pentax-pef",
+    "image/x-epson-erf",
+)
+
 cnt = 1


@@ -97,6 +119,8 @@ def mime_id(mime):
        mime_id += " | 0x02000000"
    elif mime in markup:
        mime_id += " | 0x01000000"
+    elif mime in raw:
+        mime_id += " | 0x00800000"
    elif mime == "application/x-empty":
        return "1"
    return mime_id
--- a/src/cli.c
+++ b/src/cli.c
@@ -16,7 +16,7 @@

 #define DEFAULT_MAX_MEM_BUFFER 2000

-const char* TESS_DATAPATHS[] = {
+const char *TESS_DATAPATHS[] = {
        "/usr/share/tessdata/",
        "/usr/share/tesseract-ocr/tessdata/",
        "./",
@@ -32,10 +32,18 @@ scan_args_t *scan_args_create() {
    return args;
 }

+exec_args_t *exec_args_create() {
+    exec_args_t *args = calloc(sizeof(exec_args_t), 1);
+    return args;
+}
+
 void scan_args_destroy(scan_args_t *args) {
    if (args->name != NULL) {
        free(args->name);
    }
+    if (args->incremental != NULL) {
+        free(args->incremental);
+    }
    if (args->path != NULL) {
        free(args->path);
    }
@@ -55,6 +63,10 @@ void web_args_destroy(web_args_t *args) {
    free(args);
 }

+void exec_args_destroy(exec_args_t *args) {
+    free(args);
+}
+
 int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (argc < 2) {
        fprintf(stderr, "Required positional argument: PATH.\n");
@@ -70,7 +82,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    }

    if (args->incremental != NULL) {
-        abs_path = abspath(args->incremental);
+        args->incremental = abspath(args->incremental);
        if (abs_path == NULL) {
            sist_log("main.c", SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
            args->incremental = NULL;
@@ -115,7 +127,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        return 1;
    }

-    if (args->depth < 0) {
+    if (args->depth <= 0) {
        args->depth = G_MAXINT32;
    } else {
        args->depth += 1;
@@ -147,7 +159,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

        char filename[128];
        sprintf(filename, "%s.traineddata", args->tesseract_lang);
-        const char * path = find_file_in_paths(TESS_DATAPATHS, filename);
+        const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
        if (path == NULL) {
            LOG_FATAL("cli.c", "Could not find tesseract language file!");
        }
@@ -214,6 +226,34 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    return 0;
 }

+int load_script(const char *script_path, char **dst) {
+    struct stat info;
+    int res = stat(script_path, &info);
+
+    if (res == -1) {
+        fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
+        return 1;
+    }
+
+    int fd = open(script_path, O_RDONLY);
+    if (fd == -1) {
+        fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno));
+        return 1;
+    }
+
+    *dst = malloc(info.st_size + 1);
+    res = read(fd, *dst, info.st_size);
+    if (res < 0) {
+        fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno));
+        return 1;
+    }
+
+    *(*dst + info.st_size) = '\0';
+    close(fd);
+
+    return 0;
+}
+
 int index_args_validate(index_args_t *args, int argc, const char **argv) {

    LogCtx.verbose = 1;
@@ -223,6 +263,13 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
        return 1;
    }

+    if (args->threads == 0) {
+        args->threads = 1;
+    } else if (args->threads < 0) {
+        fprintf(stderr, "Invalid threads: %d\n", args->threads);
+        return 1;
+    }
+
    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
        fprintf(stderr, "File not found: %s\n", argv[1]);
@@ -237,29 +284,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
    }

    if (args->script_path != NULL) {
-        struct stat info;
-        int res = stat(args->script_path, &info);
-
-        if (res == -1) {
-            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+        if (load_script(args->script_path, &args->script) != 0) {
            return 1;
        }
-
-        int fd = open(args->script_path, O_RDONLY);
-        if (fd == -1) {
-            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
-            return 1;
-        }
-
-        args->script = malloc(info.st_size + 1);
-        res = read(fd, args->script, info.st_size);
-        if (res < 0) {
-            fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno));
-            return 1;
-        }
-
-        *(args->script + info.st_size) = '\0';
-        close(fd);
    }

    if (args->batch_size == 0) {
@@ -295,14 +322,14 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    }

    if (args->credentials != NULL) {
-        char * ptr = strstr(args->credentials, ":");
+        char *ptr = strstr(args->credentials, ":");
        if (ptr == NULL) {
            fprintf(stderr, "Invalid --auth format, see usage\n");
            return 1;
        }

        strncpy(args->auth_user, args->credentials, (ptr - args->credentials));
-        strncpy(args->auth_pass, ptr + 1, strlen(ptr + 1));
+        strcpy(args->auth_pass, ptr + 1);

        if (strlen(args->auth_user) == 0) {
            fprintf(stderr, "--auth username must be at least one character long");
@@ -314,6 +341,31 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
        args->auth_enabled = FALSE;
    }

+    if (args->tag_credentials != NULL && args->credentials != NULL) {
+        fprintf(stderr, "--auth and --tag-auth are mutually exclusive");
+        return 1;
+    }
+
+    if (args->tag_credentials != NULL) {
+        char *ptr = strstr(args->tag_credentials, ":");
+        if (ptr == NULL) {
+            fprintf(stderr, "Invalid --tag-auth format, see usage\n");
+            return 1;
+        }
+
+        strncpy(args->auth_user, args->tag_credentials, (ptr - args->tag_credentials));
+        strcpy(args->auth_pass, ptr + 1);
+
+        if (strlen(args->auth_user) == 0) {
+            fprintf(stderr, "--tag-auth username must be at least one character long");
+            return 1;
+        }
+
+        args->tag_auth_enabled = TRUE;
+    } else {
+        args->tag_auth_enabled = FALSE;
+    }
+
    args->index_count = argc - 1;
    args->indices = argv + 1;

@@ -328,6 +380,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    LOG_DEBUGF("cli.c", "arg es_url=%s", args->es_url)
    LOG_DEBUGF("cli.c", "arg listen=%s", args->listen_address)
    LOG_DEBUGF("cli.c", "arg credentials=%s", args->credentials)
+    LOG_DEBUGF("cli.c", "arg tag_credentials=%s", args->tag_credentials)
    LOG_DEBUGF("cli.c", "arg auth_user=%s", args->auth_user)
    LOG_DEBUGF("cli.c", "arg auth_pass=%s", args->auth_pass)
    LOG_DEBUGF("cli.c", "arg index_count=%d", args->index_count)
@@ -348,3 +401,35 @@ web_args_t *web_args_create() {
    return args;
 }

+int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
+
+    if (argc < 2) {
+        fprintf(stderr, "Required positional argument: PATH.\n");
+        return 1;
+    }
+
+    char *index_path = abspath(argv[1]);
+    if (index_path == NULL) {
+        fprintf(stderr, "File not found: %s\n", argv[1]);
+        return 1;
+    } else {
+        args->index_path = argv[1];
+        free(index_path);
+    }
+
+    if (args->es_url == NULL) {
+        args->es_url = DEFAULT_ES_URL;
+    }
+
+    if (args->script_path == NULL) {
+        LOG_FATAL("cli.c", "--script-file argument is required");
+    }
+
+    if (load_script(args->script_path, &args->script) != 0) {
+        return 1;
+    }
+
+    LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
+    LOG_DEBUGF("cli.c", "arg script=%s", args->script)
+    return 0;
+}
--- a/src/cli.h
+++ b/src/cli.h
@@ -41,19 +41,29 @@ typedef struct index_args {
    int print;
    int batch_size;
    int force_reset;
+    int threads;
 } index_args_t;

 typedef struct web_args {
    char *es_url;
    char *listen_address;
    char *credentials;
+    char *tag_credentials;
    char auth_user[256];
    char auth_pass[256];
    int auth_enabled;
+    int tag_auth_enabled;
    int index_count;
    const char **indices;
 } web_args_t;

+typedef struct exec_args {
+    char *es_url;
+    const char *index_path;
+    const char *script_path;
+    char *script;
+} exec_args_t;
+
 index_args_t *index_args_create();

 void index_args_destroy(index_args_t *args);
@@ -66,4 +76,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv);

 int web_args_validate(web_args_t *args, int argc, const char **argv);

+exec_args_t *exec_args_create();
+
+void exec_args_destroy(exec_args_t *args);
+
+int exec_args_validate(exec_args_t *args, int argc, const char **argv);
+
 #endif
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -0,0 +1,6 @@
+#include "ctx.h"
+
+ScanCtx_t ScanCtx;
+WebCtx_t WebCtx;
+IndexCtx_t IndexCtx;
+LogCtx_t LogCtx;
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -5,19 +5,20 @@
 #include "tpool.h"
 #include "libscan/scan.h"
 #include "libscan/arc/arc.h"
-#include "libscan/cbr/cbr.h"
+#include "libscan/comic/comic.h"
 #include "libscan/ebook/ebook.h"
 #include "libscan/font/font.h"
 #include "libscan/media/media.h"
 #include "libscan/ooxml/ooxml.h"
 #include "libscan/text/text.h"
 #include "libscan/mobi/scan_mobi.h"
+#include "libscan/raw/raw.h"
+#include "src/io/store.h"

 #include <glib.h>
 #include <pcre.h>

-//TODO Move to individual scan ctx
-struct {
+typedef struct {
    struct index_t index;

    GHashTable *mime_table;
@@ -39,34 +40,44 @@ struct {
    int fast;

    scan_arc_ctx_t arc_ctx;
-    scan_cbr_ctx_t cbr_ctx;
+    scan_comic_ctx_t comic_ctx;
    scan_ebook_ctx_t ebook_ctx;
    scan_font_ctx_t font_ctx;
    scan_media_ctx_t media_ctx;
    scan_ooxml_ctx_t ooxml_ctx;
    scan_text_ctx_t text_ctx;
    scan_mobi_ctx_t mobi_ctx;
-} ScanCtx;
+    scan_raw_ctx_t raw_ctx;
+} ScanCtx_t;

-struct {
+typedef struct {
    int verbose;
    int very_verbose;
    int no_color;
-} LogCtx;
+} LogCtx_t;

-struct {
+typedef struct {
    char *es_url;
    int batch_size;
-} IndexCtx;
+    tpool_t *pool;
+    store_t *tag_store;
+    GHashTable *tags;
+} IndexCtx_t;

-struct {
+typedef struct {
    char *es_url;
    int index_count;
    char *auth_user;
    char *auth_pass;
    int auth_enabled;
-    struct index_t indices[16];
-} WebCtx;
+    int tag_auth_enabled;
+    struct index_t indices[64];
+} WebCtx_t;
+
+extern ScanCtx_t ScanCtx;
+extern WebCtx_t WebCtx;
+extern IndexCtx_t IndexCtx;
+extern LogCtx_t LogCtx;


 #endif
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -14,9 +14,18 @@ typedef struct es_indexer {
 } es_indexer_t;


-static es_indexer_t *Indexer;
+static __thread es_indexer_t *Indexer;

 void delete_queue(int max);
+void elastic_flush();
+
+void elastic_cleanup() {
+    elastic_flush();
+    if (Indexer != NULL) {
+        free(Indexer->es_url);
+        free(Indexer);
+    }
+}

 void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {

@@ -35,8 +44,12 @@ void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    cJSON_Delete(line);
 }

-void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
+void index_json_func(void *arg) {
+    es_bulk_line_t *line = arg;
+    elastic_index_line(line);
+}

+void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    char *json = cJSON_PrintUnformatted(document);

    size_t json_len = strlen(json);
@@ -48,11 +61,15 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    bulk_line->next = NULL;

    cJSON_free(json);
-    elastic_index_line(bulk_line);
+    tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
 }

 void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {

+    if (Indexer == NULL) {
+        Indexer = create_indexer(IndexCtx.es_url);
+    }
+
    cJSON *body = cJSON_CreateObject();
    cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
    cJSON_AddStringToObject(script_obj, "lang", "painless");
@@ -65,7 +82,7 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
    char *str = cJSON_Print(body);

    char bulk_url[4096];
-    snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
+    snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?wait_for_completion=false", Indexer->es_url);
    response_t *r = web_post(bulk_url, str);
    LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
    cJSON *resp = cJSON_Parse(r->body);
@@ -85,33 +102,44 @@ void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]
    cJSON_Delete(resp);
 }

+#define ACTION_STR_LEN 91
+
 void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
    es_bulk_line_t *line = Indexer->line_head;
    *count = 0;

    size_t buf_size = 0;
    size_t buf_cur = 0;
-    char *buf = malloc(1);
+    char *buf = malloc(8192);
+    size_t buf_capacity = 8192;

    while (line != NULL && *count < max) {
-        char action_str[512];
-        snprintf(action_str, 512,
+        char action_str[256];
+        snprintf(action_str, 256,
                 "{\"index\":{\"_id\":\"%s\", \"_type\":\"_doc\", \"_index\":\"sist2\"}}\n", line->uuid_str);
-        size_t action_str_len = strlen(action_str);

        size_t line_len = strlen(line->line);
-        buf = realloc(buf, buf_size + line_len + action_str_len);
-        buf_size += line_len + action_str_len;

-        memcpy(buf + buf_cur, action_str, action_str_len);
-        buf_cur += action_str_len;
+        while (buf_size + line_len + ACTION_STR_LEN > buf_capacity) {
+            buf_capacity *= 2;
+            buf = realloc(buf, buf_capacity);
+        }
+
+        buf_size += line_len + ACTION_STR_LEN;
+
+        memcpy(buf + buf_cur, action_str, ACTION_STR_LEN);
+        buf_cur += ACTION_STR_LEN;
        memcpy(buf + buf_cur, line->line, line_len);
        buf_cur += line_len;

        line = line->next;
        (*count)++;
    }
-    buf = realloc(buf, buf_size + 1);
+
+    if (buf_size + 1 > buf_capacity) {
+        buf = realloc(buf, buf_capacity + 1);
+    }
+
    *(buf + buf_cur) = '\0';

    *buf_len = buf_cur;
@@ -119,7 +147,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
 }

 void print_errors(response_t *r) {
-    char * tmp = malloc(r->size + 1);
+    char *tmp = malloc(r->size + 1);
    memcpy(tmp, r->body, r->size);
    *(tmp + r->size) = '\0';

@@ -177,6 +205,15 @@ void _elastic_flush(int max) {
        _elastic_flush(max / 2);
        return;

+    } else if (r->status_code == 429) {
+
+        free_response(r);
+        free(buf);
+        LOG_WARNING("elastic.c", "Got 429 status, will retry after delay")
+        usleep(1000000 * 20);
+        _elastic_flush(max);
+        return;
+
    } else if (r->status_code != 200) {
        print_errors(r);
        delete_queue(Indexer->queued);
@@ -253,7 +290,7 @@ es_indexer_t *create_indexer(const char *url) {
    return indexer;
 }

-void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {
+void finish_indexer(char *script, char *index_id) {

    char url[4096];

@@ -264,22 +301,23 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) {

    if (script != NULL) {
        execute_update_script(script, index_id);
-    }
+        free(script);

-    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
-    r = web_post(url, "");
-    LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
-    free_response(r);
+        snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
+        r = web_post(url, "");
+        LOG_INFOF("elastic.c", "Refresh index <%d>", r->status_code);
+        free_response(r);
+    }

    snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
    r = web_post(url, "");
    LOG_INFOF("elastic.c", "Merge index <%d>", r->status_code);
    free_response(r);

-    if (Indexer != NULL) {
-        free(Indexer->es_url);
-        free(Indexer);
-    }
+    snprintf(url, sizeof(url), "%s/sist2/_settings", IndexCtx.es_url);
+    r = web_put(url, "{\"index\":{\"refresh_interval\":\"1s\"}}");
+    LOG_INFOF("elastic.c", "Set refresh interval <%d>", r->status_code);
+    free_response(r);
 }

 void elastic_init(int force_reset) {
@@ -287,7 +325,7 @@ void elastic_init(int force_reset) {
    // Check if index exists
    char url[4096];
    snprintf(url, 4096, "%s/sist2", IndexCtx.es_url);
-    response_t *r = web_get(url);
+    response_t *r = web_get(url, 30);
    int index_exists = r->status_code == 200;
    free_response(r);

@@ -332,7 +370,7 @@ cJSON *elastic_get_document(const char *uuid_str) {
    char url[4096];
    snprintf(url, 4096, "%s/sist2/_doc/%s", WebCtx.es_url, uuid_str);

-    response_t *r = web_get(url);
+    response_t *r = web_get(url, 3);
    cJSON *json = NULL;
    if (r->status_code == 200) {
        json = cJSON_Parse(r->body);
@@ -346,7 +384,7 @@ char *elastic_get_status() {
    snprintf(url, 4096,
             "%s/_cluster/state/metadata/sist2?filter_path=metadata.indices.*.state", WebCtx.es_url);

-    response_t *r = web_get(url);
+    response_t *r = web_get(url, 30);
    cJSON *json = NULL;
    char *status = malloc(128 * sizeof(char));
    status[0] = '\0';
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -16,15 +16,14 @@ typedef struct es_indexer es_indexer_t;

 void elastic_index_line(es_bulk_line_t *line);

-void elastic_flush();
-
 void print_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

 void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

 es_indexer_t *create_indexer(const char* es_url);

-void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
+void elastic_cleanup();
+void finish_indexer(char *script, char *index_id);

 void elastic_init(int force_reset);

@@ -32,4 +31,6 @@ cJSON *elastic_get_document(const char *uuid_str);

 char *elastic_get_status();

+void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]);
+
 #endif
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/index/web.c
+++ b/src/index/web.c
@@ -1,11 +1,19 @@
 #include "web.h"
 #include "src/sist.h"
-#include "src/ctx.h"

 #include <mongoose.h>
 #include <pthread.h>
+#include <curl/curl.h>


+size_t write_cb(char *ptr, size_t size, size_t nmemb, void *user_data) {
+
+    size_t real_size = size * nmemb;
+    dyn_buffer_t *buf = user_data;
+    dyn_buffer_write(buf, ptr, real_size);
+    return real_size;
+}
+
 void free_response(response_t *resp) {
    if (resp->body != NULL) {
        free(resp->body);
@@ -75,7 +83,7 @@ subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *p
    subreq_ctx_t *ctx = malloc(sizeof(subreq_ctx_t));
    mg_mgr_init(&ctx->mgr, NULL);

-    char address[8196];
+    char address[8192];
    snprintf(address, sizeof(address), "tcp://%.*s:%u", (int) host.len, host.p, port);
    struct mg_connection *nc = mg_connect(&ctx->mgr, address, http_req_ev);
    nc->user_data = &ctx->ev_data;
@@ -100,55 +108,126 @@ subreq_ctx_t *http_req(const char *url, const char *extra_headers, const char *p
    return ctx;
 }

-response_t *web_get(const char *url) {
-    subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "GET");
-    while (ctx->ev_data.done == FALSE) {
-        mg_mgr_poll(&ctx->mgr, 50);
-    }
-    mg_mgr_free(&ctx->mgr);
-
-    response_t *ret = ctx->ev_data.resp;
-    free(ctx);
-    return ret;
-}
-
 subreq_ctx_t *web_post_async(const char *url, const char *data) {
    return http_req(url, SIST2_HEADERS, data, "POST");
 }

-response_t *web_post(const char *url, const char *data) {
-    subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "POST");
+response_t *web_get(const char *url, int timeout) {
+    response_t *resp = malloc(sizeof(response_t));

-    while (ctx->ev_data.done == FALSE) {
-        mg_mgr_poll(&ctx->mgr, 50);
-    }
-    mg_mgr_free(&ctx->mgr);
+    CURL *curl;
+    dyn_buffer_t buffer = dyn_buffer_create();

-    response_t *ret = ctx->ev_data.resp;
-    free(ctx);
-    return ret;
+    curl = curl_easy_init();
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
+
+    struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+    curl_easy_perform(curl);
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
+    resp->body = buffer.buf;
+    resp->size = buffer.cur;
+    return resp;
 }

-response_t *web_put(const char *url, const char *data) {
-    subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, data, "PUT");
-    while (ctx->ev_data.done == FALSE) {
-        mg_mgr_poll(&ctx->mgr, 50);
-    }
-    mg_mgr_free(&ctx->mgr);
+response_t *web_post(const char *url, const char *data) {

-    response_t *ret = ctx->ev_data.resp;
-    free(ctx);
-    return ret;
+    response_t *resp = malloc(sizeof(response_t));
+
+    CURL *curl;
+    dyn_buffer_t buffer = dyn_buffer_create();
+
+    curl = curl_easy_init();
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl, CURLOPT_POST, 1);
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+
+    struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
+
+    curl_easy_perform(curl);
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
+    resp->body = buffer.buf;
+    resp->size = buffer.cur;
+
+    return resp;
+}
+
+
+response_t *web_put(const char *url, const char *data) {
+
+    response_t *resp = malloc(sizeof(response_t));
+
+    CURL *curl;
+    dyn_buffer_t buffer = dyn_buffer_create();
+
+    curl = curl_easy_init();
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+    curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
+    curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
+
+    struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
+
+    curl_easy_perform(curl);
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
+    resp->body = buffer.buf;
+    resp->size = buffer.cur;
+    return resp;
 }

 response_t *web_delete(const char *url) {
-    subreq_ctx_t *ctx = http_req(url, SIST2_HEADERS, NULL, "DELETE");
-    while (ctx->ev_data.done == FALSE) {
-        mg_mgr_poll(&ctx->mgr, 50);
-    }
-    mg_mgr_free(&ctx->mgr);

-    response_t *ret = ctx->ev_data.resp;
-    free(ctx);
-    return ret;
-}
+    response_t *resp = malloc(sizeof(response_t));
+
+    CURL *curl;
+    dyn_buffer_t buffer = dyn_buffer_create();
+
+    curl = curl_easy_init();
+    curl_easy_setopt(curl, CURLOPT_URL, url);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) (&buffer));
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
+    curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "DELETE");
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
+
+    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");
+    struct curl_slist *headers = curl_slist_append(headers, "Content-Type: application/json");
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+
+    curl_easy_perform(curl);
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
+
+    curl_easy_cleanup(curl);
+    curl_slist_free_all(headers);
+
+    resp->body = buffer.buf;
+    resp->size = buffer.cur;
+    return resp;
+}
--- a/src/index/web.h
+++ b/src/index/web.h
@@ -20,7 +20,7 @@ typedef struct {
    struct mg_mgr mgr;
 } subreq_ctx_t;

-response_t *web_get(const char *url);
+response_t *web_get(const char *url, int timeout);
 response_t *web_post(const char * url, const char * data);
 subreq_ctx_t *web_post_async(const char *url, const char *data);
 response_t *web_put(const char *url, const char *data);
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -62,7 +62,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    int fd = open(path, O_RDONLY);

    if (fd == -1) {
-        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path ,strerror(errno))
+        LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
    }

    char *buf = malloc(info.st_size + 1);
@@ -172,8 +172,8 @@ void write_document(document_t *doc) {
    dyn_buffer_t buf = dyn_buffer_create();

    // Ignore root directory in the file path
-    doc->ext = doc->ext - ScanCtx.index.desc.root_len;
-    doc->base = doc->base - ScanCtx.index.desc.root_len;
+    doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
+    doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
    doc->filepath += ScanCtx.index.desc.root_len;

    dyn_buffer_write(&buf, doc, sizeof(line_t));
@@ -230,7 +230,7 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        char uuid_str[UUID_STR_LEN];
        uuid_unparse(line.uuid, uuid_str);

-        const char* mime_text = mime_get_mime_text(line.mime);
+        const char *mime_text = mime_get_mime_text(line.mime);
        if (mime_text == NULL) {
            cJSON_AddNullToObject(document, "mime");
        } else {
@@ -239,12 +239,20 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
        cJSON_AddNumberToObject(document, "size", (double) line.size);
        cJSON_AddNumberToObject(document, "mtime", line.mtime);

-        int c;
+        int c = 0;
        while ((c = getc(file)) != 0) {
            dyn_buffer_write_char(&buf, (char) c);
        }
        dyn_buffer_write_char(&buf, '\0');

+        if (IndexCtx.tags != NULL) {
+            const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
+            if (tags_string != NULL) {
+                cJSON *tags_arr = cJSON_Parse(tags_string);
+                cJSON_AddItemToObject(document, "tag", tags_arr);
+            }
+        }
+
        cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
        if (*(buf.buf + line.ext - 1) == '.') {
            *(buf.buf + line.ext - 1) = '\0';
--- a/src/io/store.c
+++ b/src/io/store.c
@@ -1,9 +1,10 @@
 #include "store.h"
 #include "src/ctx.h"

-store_t *store_create(char *path) {
+store_t *store_create(char *path, size_t chunk_size) {

    store_t *store = malloc(sizeof(struct store_t));
+    store->chunk_size = chunk_size;
    pthread_rwlock_init(&store->lock, NULL);

    mdb_env_create(&store->env);
@@ -18,7 +19,7 @@ store_t *store_create(char *path) {
        LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
    }

-    store->size = (size_t) 1024 * 1024 * 5;
+    store->size = (size_t) store->chunk_size;
    ScanCtx.stat_tn_size = 0;
    mdb_env_set_mapsize(store->env, store->size);

@@ -69,7 +70,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
        // Cannot resize when there is a opened transaction.
        //  Resize take effect on the next commit.
        pthread_rwlock_wrlock(&store->lock);
-        store->size += 1024 * 1024 * 50;
+        store->size += store->chunk_size;
        mdb_env_set_mapsize(store->env, store->size);
        mdb_txn_begin(store->env, NULL, 0, &txn);
        put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
@@ -110,3 +111,40 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
    return buf;
 }

+GHashTable *store_read_all(store_t *store) {
+
+    int count = 0;
+
+    GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
+
+    MDB_txn *txn = NULL;
+    mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
+
+    MDB_cursor *cur = NULL;
+    mdb_cursor_open(txn, store->dbi, &cur);
+
+    MDB_val key;
+    MDB_val value;
+
+    while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
+        char *key_str = malloc(key.mv_size);
+        memcpy(key_str, key.mv_data, key.mv_size);
+        char *val_str = malloc(value.mv_size);
+        memcpy(val_str, value.mv_data, value.mv_size);
+
+        g_hash_table_insert(table, key_str, val_str);
+        count += 1;
+    }
+
+    LOG_DEBUGF("store.c", "Read tags for %d documents", count);
+
+    mdb_cursor_close(cur);
+    mdb_txn_abort(txn);
+    return table;
+}
+
+
+void store_copy(store_t *store, const char *destination) {
+    mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
+    mdb_env_copy(store->env, destination);
+}
--- a/src/io/store.h
+++ b/src/io/store.h
@@ -4,14 +4,20 @@
 #include <pthread.h>
 #include <lmdb.h>

+#include <glib.h>
+
+#define STORE_SIZE_TN 1024 * 1024 * 5
+#define STORE_SIZE_TAG 1024 * 16
+
 typedef struct store_t {
    MDB_dbi dbi;
    MDB_env *env;
    size_t size;
+    size_t chunk_size;
    pthread_rwlock_t lock;
 } store_t;

-store_t *store_create(char *path);
+store_t *store_create(char *path, size_t chunk_size);

 void store_destroy(store_t *store);

@@ -19,4 +25,8 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu

 char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);

+GHashTable *store_read_all(store_t *store);
+
+void store_copy(store_t *store, const char *destination);
+
 #endif
--- a/src/main.c
+++ b/src/main.c
@@ -2,7 +2,6 @@
 #include "ctx.h"

 #include <third-party/argparse/argparse.h>
-#include <glib.h>
 #include <locale.h>

 #include "cli.h"
@@ -22,11 +21,12 @@
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "2.4.2";
+static const char *const Version = "2.7.4";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
        "sist2 web [OPTION]... INDEX...",
+        "sist2 exec-script [OPTION]... INDEX",
        NULL,
 };

@@ -75,7 +75,7 @@ void _logf(const char *filepath, int level, char *format, ...) {

    va_start(args, format);
    if (level == LEVEL_FATAL) {
-        sist_logf(filepath, level, format, args);
+        vsist_logf(filepath, level, format, args);
        exit(-1);
    }

@@ -85,7 +85,7 @@ void _logf(const char *filepath, int level, char *format, ...) {
                vsist_logf(filepath, level, format, args);
            }
        } else {
-            sist_logf(filepath, level, format, args);
+            vsist_logf(filepath, level, format, args);
        }
    }
    va_end(args);
@@ -99,11 +99,14 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.arc_ctx.logf = _logf;
    ScanCtx.arc_ctx.parse = (parse_callback_t) parse;

-    // Cbr
-    ScanCtx.cbr_ctx.log = _log;
-    ScanCtx.cbr_ctx.logf = _logf;
-    ScanCtx.cbr_ctx.store = _store;
-    ScanCtx.cbr_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
+    // Comic
+    ScanCtx.comic_ctx.log = _log;
+    ScanCtx.comic_ctx.logf = _logf;
+    ScanCtx.comic_ctx.store = _store;
+    ScanCtx.comic_ctx.tn_size = args->size;
+    ScanCtx.comic_ctx.tn_qscale = args->quality;
+    ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
+    ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");

    // Ebook
    pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
@@ -134,6 +137,7 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.ooxml_ctx.content_size = args->content_size;
    ScanCtx.ooxml_ctx.log = _log;
    ScanCtx.ooxml_ctx.logf = _logf;
+    ScanCtx.ooxml_ctx.store = _store;

    // MOBI
    ScanCtx.mobi_ctx.content_size = args->content_size;
@@ -154,6 +158,13 @@ void initialize_scan_context(scan_args_t *args) {
    strncpy(ScanCtx.index.desc.rewrite_url, args->rewrite_url, sizeof(ScanCtx.index.desc.rewrite_url));
    ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
    ScanCtx.fast = args->fast;
+
+    // Raw
+    ScanCtx.raw_ctx.tn_qscale = args->quality;
+    ScanCtx.raw_ctx.tn_size = args->size;
+    ScanCtx.raw_ctx.log = _log;
+    ScanCtx.raw_ctx.logf = _logf;
+    ScanCtx.raw_ctx.store = _store;
 }


@@ -169,7 +180,7 @@ void sist2_scan(scan_args_t *args) {
    char store_path[PATH_MAX];
    snprintf(store_path, PATH_MAX, "%sthumbs", ScanCtx.index.path);
    mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
-    ScanCtx.index.store = store_create(store_path);
+    ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);

    scan_print_header();

@@ -204,7 +215,7 @@ void sist2_scan(scan_args_t *args) {
        LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
    }

-    ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
+    ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE);
    tpool_start(ScanCtx.pool);
    walk_directory_tree(ScanCtx.index.desc.root);
    tpool_wait(ScanCtx.pool);
@@ -216,7 +227,7 @@ void sist2_scan(scan_args_t *args) {
        char dst_path[PATH_MAX];
        snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
        snprintf(dst_path, PATH_MAX, "%s_index_original", ScanCtx.index.path);
-        store_t *source = store_create(store_path);
+        store_t *source = store_create(store_path, STORE_SIZE_TN);

        DIR *dir = opendir(args->incremental);
        if (dir == NULL) {
@@ -233,6 +244,13 @@ void sist2_scan(scan_args_t *args) {
        }
        closedir(dir);
        store_destroy(source);
+
+        snprintf(store_path, PATH_MAX, "%stags", args->incremental);
+        snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
+        mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
+        store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
+        store_copy(source_tags, dst_path);
+        store_destroy(source_tags);
    }

    store_destroy(ScanCtx.index.store);
@@ -264,6 +282,12 @@ void sist2_index(index_args_t *args) {
        LOG_FATALF("main.c", "Could not open index %s: %s", args->index_path, strerror(errno))
    }

+    char path_tmp[PATH_MAX];
+    snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
+    mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
+    IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
+    IndexCtx.tags = store_read_all(IndexCtx.tag_store);
+
    index_func f;
    if (args->print) {
        f = print_json;
@@ -271,6 +295,16 @@ void sist2_index(index_args_t *args) {
        f = index_json;
    }

+    void (*cleanup)();
+    if (args->print) {
+        cleanup = NULL;
+    } else {
+        cleanup = elastic_cleanup;
+    }
+
+    IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE);
+    tpool_start(IndexCtx.pool);
+
    struct dirent *de;
    while ((de = readdir(dir)) != NULL) {
        if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
@@ -281,10 +315,32 @@ void sist2_index(index_args_t *args) {
    }
    closedir(dir);

+    tpool_wait(IndexCtx.pool);
+
    if (!args->print) {
-        elastic_flush();
-        destroy_indexer(args->script, desc.uuid);
+        finish_indexer(args->script, desc.uuid);
    }
+    tpool_destroy(IndexCtx.pool);
+
+    store_destroy(IndexCtx.tag_store);
+    g_hash_table_remove_all(IndexCtx.tags);
+    g_hash_table_destroy(IndexCtx.tags);
+}
+
+void sist2_exec_script(exec_args_t *args) {
+
+    LogCtx.verbose = TRUE;
+
+    char descriptor_path[PATH_MAX];
+    snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
+    index_descriptor_t desc = read_index_descriptor(descriptor_path);
+
+    IndexCtx.es_url = args->es_url;
+
+    LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
+
+    execute_update_script(args->script, desc.uuid);
+    free(args->script);
 }

 void sist2_web(web_args_t *args) {
@@ -294,6 +350,7 @@ void sist2_web(web_args_t *args) {
    WebCtx.auth_user = args->auth_user;
    WebCtx.auth_pass = args->auth_pass;
    WebCtx.auth_enabled = args->auth_enabled;
+    WebCtx.tag_auth_enabled = args->tag_auth_enabled;

    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
@@ -303,7 +360,11 @@ void sist2_web(web_args_t *args) {
        char path_tmp[PATH_MAX];

        snprintf(path_tmp, PATH_MAX, "%sthumbs", abs_path);
-        WebCtx.indices[i].store = store_create(path_tmp);
+        WebCtx.indices[i].store = store_create(path_tmp, STORE_SIZE_TN);
+
+        snprintf(path_tmp, PATH_MAX, "%stags", abs_path);
+        mkdir(path_tmp, S_IWUSR | S_IRUSR | S_IXUSR);
+        WebCtx.indices[i].tag_store = store_create(path_tmp, STORE_SIZE_TAG);

        snprintf(path_tmp, PATH_MAX, "%sdescriptor.json", abs_path);
        WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
@@ -323,10 +384,13 @@ int main(int argc, const char *argv[]) {
    scan_args_t *scan_args = scan_args_create();
    index_args_t *index_args = index_args_create();
    web_args_t *web_args = web_args_create();
+    exec_args_t *exec_args = exec_args_create();

    int arg_version = 0;

    char *common_es_url = NULL;
+    char *common_script_path = NULL;
+    int common_threads = 0;

    struct argparse_option options[] = {
            OPT_HELP(),
@@ -336,7 +400,7 @@ int main(int argc, const char *argv[]) {
            OPT_BOOLEAN(0, "very-verbose", &LogCtx.very_verbose, "Turn on debug messages"),

            OPT_GROUP("Scan options"),
-            OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
            OPT_FLOAT('q', "quality", &scan_args->quality,
                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
            OPT_INTEGER(0, "size", &scan_args->size,
@@ -364,9 +428,10 @@ int main(int argc, const char *argv[]) {
                        "(see USAGE.md). DEFAULT: 2000"),

            OPT_GROUP("Index options"),
+            OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
-            OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
+            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
            OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
                                                                      "(You must use this option the first time you use the index command)"),
@@ -375,6 +440,10 @@ int main(int argc, const char *argv[]) {
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
            OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"),
            OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
+            OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
+
+            OPT_GROUP("Exec-script options"),
+            OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),

            OPT_END(),
    };
@@ -395,6 +464,11 @@ int main(int argc, const char *argv[]) {

    web_args->es_url = common_es_url;
    index_args->es_url = common_es_url;
+    exec_args->es_url = common_es_url;
+    index_args->script_path = common_script_path;
+    exec_args->script_path = common_script_path;
+    index_args->threads = common_threads;
+    scan_args->threads = common_threads;

    if (argc == 0) {
        argparse_usage(&argparse);
@@ -423,6 +497,14 @@ int main(int argc, const char *argv[]) {
        }
        sist2_web(web_args);

+    }  else if (strcmp(argv[0], "exec-script") == 0) {
+
+        int err = exec_args_validate(exec_args, argc, argv);
+        if (err != 0) {
+            goto end;
+        }
+        sist2_exec_script(exec_args);
+
    } else {
        fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
        argparse_usage(&argparse);
@@ -434,6 +516,7 @@ int main(int argc, const char *argv[]) {
    scan_args_destroy(scan_args);
    index_args_destroy(index_args);
    web_args_destroy(web_args);
+    exec_args_destroy(exec_args);

    return 0;
 }
--- a/src/parsing/mime.h
+++ b/src/parsing/mime.h
@@ -3,7 +3,7 @@

 #include "../sist.h"

-#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
+#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16

 #define MIME_EMPTY 1

@@ -31,6 +31,9 @@
 #define MARKUP_MASK 0x01000000
 #define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK

+#define RAW_MASK 0x00800000
+#define IS_RAW(mime_id) (mime_id & RAW_MASK) == RAW_MASK
+
 enum major_mime {
    MimeInvalid = 0,
    MimeModel = 1,
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
@@ -108,7 +108,7 @@ enum mime {
    application_x_bzip=655460,
    application_x_bzip2=655461 | 0x08000000,
    application_x_cbr=655462,
-    application_x_cbz=655463 | 0x40000000,
+    application_x_cbz=655463,
    application_x_cdlink=655464,
    application_x_chat=655465,
    application_x_chrome_extension=655466,
@@ -315,127 +315,145 @@ enum mime {
    image_webp=524595,
    image_wmf=524596,
    image_x_3ds=524597,
-    image_x_award_bioslogo=524598,
-    image_x_cmu_raster=524599,
-    image_x_cur=524600,
-    image_x_dwg=524601,
-    image_x_eps=524602,
-    image_x_exr=524603,
-    image_x_gem=524604,
-    image_x_icns=524605,
-    image_x_icon=524606 | 0x80000000,
-    image_x_jg=524607,
-    image_x_jps=524608,
-    image_x_ms_bmp=524609,
-    image_x_niff=524610,
-    image_x_pcx=524611,
-    image_x_pict=524612,
-    image_x_portable_bitmap=524613,
-    image_x_portable_graymap=524614,
-    image_x_portable_pixmap=524615,
-    image_x_quicktime=524616,
-    image_x_rgb=524617,
-    image_x_tga=524618,
-    image_x_tiff=524619,
-    image_x_win_bitmap=524620,
-    image_x_xcf=524621 | 0x80000000,
-    image_x_xpixmap=524622 | 0x80000000,
-    image_x_xwindowdump=524623,
-    message_news=196944,
-    message_rfc822=196945,
-    model_vnd_dwf=65874,
-    model_vnd_gdl=65875,
-    model_vnd_gs_gdl=65876,
-    model_vrml=65877,
-    model_x_pov=65878,
-    text_PGP=590167,
-    text_asp=590168,
-    text_css=590169,
-    text_html=590170 | 0x01000000,
-    text_javascript=590171,
-    text_mcf=590172,
-    text_pascal=590173,
-    text_plain=590174,
-    text_richtext=590175,
-    text_rtf=590176,
-    text_scriplet=590177,
-    text_tab_separated_values=590178,
-    text_troff=590179,
-    text_uri_list=590180,
-    text_vnd_abc=590181,
-    text_vnd_fmi_flexstor=590182,
-    text_vnd_wap_wml=590183,
-    text_vnd_wap_wmlscript=590184,
-    text_webviewhtml=590185,
-    text_x_Algol68=590186,
-    text_x_asm=590187,
-    text_x_audiosoft_intra=590188,
-    text_x_awk=590189,
-    text_x_bcpl=590190,
-    text_x_c=590191,
-    text_x_c__=590192,
-    text_x_component=590193,
-    text_x_diff=590194,
-    text_x_fortran=590195,
-    text_x_java=590196,
-    text_x_la_asf=590197,
-    text_x_lisp=590198,
-    text_x_m=590199,
-    text_x_m4=590200,
-    text_x_makefile=590201,
-    text_x_ms_regedit=590202,
-    text_x_msdos_batch=590203,
-    text_x_objective_c=590204,
-    text_x_pascal=590205,
-    text_x_perl=590206,
-    text_x_php=590207,
-    text_x_po=590208,
-    text_x_python=590209,
-    text_x_ruby=590210,
-    text_x_sass=590211,
-    text_x_scss=590212,
-    text_x_server_parsed_html=590213,
-    text_x_setext=590214,
-    text_x_sgml=590215 | 0x01000000,
-    text_x_shellscript=590216,
-    text_x_speech=590217,
-    text_x_tcl=590218,
-    text_x_tex=590219,
-    text_x_uil=590220,
-    text_x_uuencode=590221,
-    text_x_vcalendar=590222,
-    text_x_vcard=590223,
-    text_xml=590224 | 0x01000000,
-    video_MP2T=393617,
-    video_animaflex=393618,
-    video_avi=393619,
-    video_avs_video=393620,
-    video_mp4=393621,
-    video_mpeg=393622,
-    video_quicktime=393623,
-    video_vdo=393624,
-    video_vivo=393625,
-    video_vnd_rn_realvideo=393626,
-    video_vosaic=393627,
-    video_webm=393628,
-    video_x_amt_demorun=393629,
-    video_x_amt_showrun=393630,
-    video_x_atomic3d_feature=393631,
-    video_x_dl=393632,
-    video_x_dv=393633,
-    video_x_fli=393634,
-    video_x_flv=393635,
-    video_x_isvideo=393636,
-    video_x_jng=393637 | 0x80000000,
-    video_x_m4v=393638,
-    video_x_matroska=393639,
-    video_x_mng=393640,
-    video_x_motion_jpeg=393641,
-    video_x_ms_asf=393642,
-    video_x_msvideo=393643,
-    video_x_qtc=393644,
-    video_x_sgi_movie=393645,
-    x_epoc_x_sisx_app=721326,
+    image_x_adobe_dng=524598 | 0x00800000,
+    image_x_award_bioslogo=524599,
+    image_x_canon_cr2=524600 | 0x00800000,
+    image_x_canon_crw=524601 | 0x00800000,
+    image_x_cmu_raster=524602,
+    image_x_cur=524603,
+    image_x_dcraw=524604 | 0x00800000,
+    image_x_dwg=524605,
+    image_x_eps=524606,
+    image_x_epson_erf=524607 | 0x00800000,
+    image_x_exr=524608,
+    image_x_fuji_raf=524609 | 0x00800000,
+    image_x_gem=524610,
+    image_x_icns=524611,
+    image_x_icon=524612 | 0x80000000,
+    image_x_jg=524613,
+    image_x_jps=524614,
+    image_x_kodak_dcr=524615 | 0x00800000,
+    image_x_kodak_k25=524616 | 0x00800000,
+    image_x_kodak_kdc=524617 | 0x00800000,
+    image_x_minolta_mrw=524618 | 0x00800000,
+    image_x_ms_bmp=524619,
+    image_x_niff=524620,
+    image_x_nikon_nef=524621 | 0x00800000,
+    image_x_olympus_orf=524622 | 0x00800000,
+    image_x_panasonic_raw=524623 | 0x00800000,
+    image_x_pcx=524624,
+    image_x_pentax_pef=524625 | 0x00800000,
+    image_x_pict=524626,
+    image_x_portable_bitmap=524627,
+    image_x_portable_graymap=524628,
+    image_x_portable_pixmap=524629,
+    image_x_quicktime=524630,
+    image_x_rgb=524631,
+    image_x_sigma_x3f=524632 | 0x00800000,
+    image_x_sony_arw=524633 | 0x00800000,
+    image_x_sony_sr2=524634 | 0x00800000,
+    image_x_sony_srf=524635 | 0x00800000,
+    image_x_tga=524636,
+    image_x_tiff=524637,
+    image_x_win_bitmap=524638,
+    image_x_xcf=524639 | 0x80000000,
+    image_x_xpixmap=524640 | 0x80000000,
+    image_x_xwindowdump=524641,
+    message_news=196962,
+    message_rfc822=196963,
+    model_vnd_dwf=65892,
+    model_vnd_gdl=65893,
+    model_vnd_gs_gdl=65894,
+    model_vrml=65895,
+    model_x_pov=65896,
+    text_PGP=590185,
+    text_asp=590186,
+    text_css=590187,
+    text_html=590188 | 0x01000000,
+    text_javascript=590189,
+    text_mcf=590190,
+    text_pascal=590191,
+    text_plain=590192,
+    text_richtext=590193,
+    text_rtf=590194,
+    text_scriplet=590195,
+    text_tab_separated_values=590196,
+    text_troff=590197,
+    text_uri_list=590198,
+    text_vnd_abc=590199,
+    text_vnd_fmi_flexstor=590200,
+    text_vnd_wap_wml=590201,
+    text_vnd_wap_wmlscript=590202,
+    text_webviewhtml=590203,
+    text_x_Algol68=590204,
+    text_x_asm=590205,
+    text_x_audiosoft_intra=590206,
+    text_x_awk=590207,
+    text_x_bcpl=590208,
+    text_x_c=590209,
+    text_x_c__=590210,
+    text_x_component=590211,
+    text_x_diff=590212,
+    text_x_fortran=590213,
+    text_x_java=590214,
+    text_x_la_asf=590215,
+    text_x_lisp=590216,
+    text_x_m=590217,
+    text_x_m4=590218,
+    text_x_makefile=590219,
+    text_x_ms_regedit=590220,
+    text_x_msdos_batch=590221,
+    text_x_objective_c=590222,
+    text_x_pascal=590223,
+    text_x_perl=590224,
+    text_x_php=590225,
+    text_x_po=590226,
+    text_x_python=590227,
+    text_x_ruby=590228,
+    text_x_sass=590229,
+    text_x_scss=590230,
+    text_x_server_parsed_html=590231,
+    text_x_setext=590232,
+    text_x_sgml=590233 | 0x01000000,
+    text_x_shellscript=590234,
+    text_x_speech=590235,
+    text_x_tcl=590236,
+    text_x_tex=590237,
+    text_x_uil=590238,
+    text_x_uuencode=590239,
+    text_x_vcalendar=590240,
+    text_x_vcard=590241,
+    text_xml=590242 | 0x01000000,
+    video_MP2T=393635,
+    video_animaflex=393636,
+    video_avi=393637,
+    video_avs_video=393638,
+    video_mp4=393639,
+    video_mpeg=393640,
+    video_quicktime=393641,
+    video_vdo=393642,
+    video_vivo=393643,
+    video_vnd_rn_realvideo=393644,
+    video_vosaic=393645,
+    video_webm=393646,
+    video_x_amt_demorun=393647,
+    video_x_amt_showrun=393648,
+    video_x_atomic3d_feature=393649,
+    video_x_dl=393650,
+    video_x_dv=393651,
+    video_x_fli=393652,
+    video_x_flv=393653,
+    video_x_isvideo=393654,
+    video_x_jng=393655 | 0x80000000,
+    video_x_m4v=393656,
+    video_x_matroska=393657,
+    video_x_mng=393658,
+    video_x_motion_jpeg=393659,
+    video_x_ms_asf=393660,
+    video_x_msvideo=393661,
+    video_x_qtc=393662,
+    video_x_sgi_movie=393663,
+    x_epoc_x_sisx_app=721344,
 };
 char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
 case application_arj: return "application/arj";
@@ -868,6 +886,24 @@ case video_x_sgi_movie: return "video/x-sgi-movie";
 case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
 case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
 case application_vnd_ms_outlook: return "application/vnd.ms-outlook";
+case image_x_olympus_orf: return "image/x-olympus-orf";
+case image_x_nikon_nef: return "image/x-nikon-nef";
+case image_x_fuji_raf: return "image/x-fuji-raf";
+case image_x_panasonic_raw: return "image/x-panasonic-raw";
+case image_x_adobe_dng: return "image/x-adobe-dng";
+case image_x_canon_cr2: return "image/x-canon-cr2";
+case image_x_canon_crw: return "image/x-canon-crw";
+case image_x_dcraw: return "image/x-dcraw";
+case image_x_kodak_dcr: return "image/x-kodak-dcr";
+case image_x_kodak_k25: return "image/x-kodak-k25";
+case image_x_kodak_kdc: return "image/x-kodak-kdc";
+case image_x_minolta_mrw: return "image/x-minolta-mrw";
+case image_x_pentax_pef: return "image/x-pentax-pef";
+case image_x_sigma_x3f: return "image/x-sigma-x3f";
+case image_x_sony_arw: return "image/x-sony-arw";
+case image_x_sony_sr2: return "image/x-sony-sr2";
+case image_x_sony_srf: return "image/x-sony-srf";
+case image_x_epson_erf: return "image/x-epson-erf";
 default: return NULL;}}
 GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
 g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@@ -1000,7 +1036,6 @@ g_hash_table_insert(ext_table, "cpio", (gpointer)application_x_cpio);
 g_hash_table_insert(ext_table, "dbf", (gpointer)application_x_dbf);
 g_hash_table_insert(ext_table, "deb", (gpointer)application_x_debian_package);
 g_hash_table_insert(ext_table, "deepv", (gpointer)application_x_deepv);
-g_hash_table_insert(ext_table, "dcr", (gpointer)application_x_director);
 g_hash_table_insert(ext_table, "dir", (gpointer)application_x_director);
 g_hash_table_insert(ext_table, "dxr", (gpointer)application_x_director);
 g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
@@ -1389,6 +1424,24 @@ g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
 g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
 g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
 g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
+g_hash_table_insert(ext_table, "orf", (gpointer)image_x_olympus_orf);
+g_hash_table_insert(ext_table, "nef", (gpointer)image_x_nikon_nef);
+g_hash_table_insert(ext_table, "raf", (gpointer)image_x_fuji_raf);
+g_hash_table_insert(ext_table, "rw2", (gpointer)image_x_panasonic_raw);
+g_hash_table_insert(ext_table, "raw", (gpointer)image_x_panasonic_raw);
+g_hash_table_insert(ext_table, "dng", (gpointer)image_x_adobe_dng);
+g_hash_table_insert(ext_table, "cr2", (gpointer)image_x_canon_cr2);
+g_hash_table_insert(ext_table, "crw", (gpointer)image_x_canon_crw);
+g_hash_table_insert(ext_table, "dcr", (gpointer)image_x_kodak_dcr);
+g_hash_table_insert(ext_table, "k25", (gpointer)image_x_kodak_k25);
+g_hash_table_insert(ext_table, "kdc", (gpointer)image_x_kodak_kdc);
+g_hash_table_insert(ext_table, "mrw", (gpointer)image_x_minolta_mrw);
+g_hash_table_insert(ext_table, "pef", (gpointer)image_x_pentax_pef);
+g_hash_table_insert(ext_table, "xf3", (gpointer)image_x_sigma_x3f);
+g_hash_table_insert(ext_table, "arw", (gpointer)image_x_sony_arw);
+g_hash_table_insert(ext_table, "sr2", (gpointer)image_x_sony_sr2);
+g_hash_table_insert(ext_table, "srf", (gpointer)image_x_sony_srf);
+g_hash_table_insert(ext_table, "erf", (gpointer)image_x_epson_erf);
 return ext_table;}
 GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
 g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@@ -1821,5 +1874,23 @@ g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie
 g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
 g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
 g_hash_table_insert(mime_table, "application/vnd.ms-outlook", (gpointer)application_vnd_ms_outlook);
+g_hash_table_insert(mime_table, "image/x-olympus-orf", (gpointer)image_x_olympus_orf);
+g_hash_table_insert(mime_table, "image/x-nikon-nef", (gpointer)image_x_nikon_nef);
+g_hash_table_insert(mime_table, "image/x-fuji-raf", (gpointer)image_x_fuji_raf);
+g_hash_table_insert(mime_table, "image/x-panasonic-raw", (gpointer)image_x_panasonic_raw);
+g_hash_table_insert(mime_table, "image/x-adobe-dng", (gpointer)image_x_adobe_dng);
+g_hash_table_insert(mime_table, "image/x-canon-cr2", (gpointer)image_x_canon_cr2);
+g_hash_table_insert(mime_table, "image/x-canon-crw", (gpointer)image_x_canon_crw);
+g_hash_table_insert(mime_table, "image/x-dcraw", (gpointer)image_x_dcraw);
+g_hash_table_insert(mime_table, "image/x-kodak-dcr", (gpointer)image_x_kodak_dcr);
+g_hash_table_insert(mime_table, "image/x-kodak-k25", (gpointer)image_x_kodak_k25);
+g_hash_table_insert(mime_table, "image/x-kodak-kdc", (gpointer)image_x_kodak_kdc);
+g_hash_table_insert(mime_table, "image/x-minolta-mrw", (gpointer)image_x_minolta_mrw);
+g_hash_table_insert(mime_table, "image/x-pentax-pef", (gpointer)image_x_pentax_pef);
+g_hash_table_insert(mime_table, "image/x-sigma-x3f", (gpointer)image_x_sigma_x3f);
+g_hash_table_insert(mime_table, "image/x-sony-arw", (gpointer)image_x_sony_arw);
+g_hash_table_insert(mime_table, "image/x-sony-sr2", (gpointer)image_x_sony_sr2);
+g_hash_table_insert(mime_table, "image/x-sony-srf", (gpointer)image_x_sony_srf);
+g_hash_table_insert(mime_table, "image/x-epson-erf", (gpointer)image_x_epson_erf);
 return mime_table;}
 #endif
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -119,6 +119,8 @@ void parse(void *arg) {

    if (!(SHOULD_PARSE(doc.mime))) {

+    } else if (IS_RAW(doc.mime)) {
+        parse_raw(&ScanCtx.raw_ctx, &job->vfile, &doc);
    } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
               (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {

@@ -143,11 +145,10 @@ void parse(void *arg) {
                    (IS_ARC_FILTER(doc.mime) && should_parse_filtered_file(doc.filepath, doc.ext))
            )) {
        parse_archive(&ScanCtx.arc_ctx, &job->vfile, &doc);
-    } else if (ScanCtx.ooxml_ctx.content_size > 0 && IS_DOC(doc.mime)) {
+    } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc.mime)) {
        parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, &doc);
-
-    } else if (is_cbr(&ScanCtx.cbr_ctx, doc.mime)) {
-        parse_cbr(&ScanCtx.cbr_ctx, &job->vfile, &doc);
+    } else if (is_cbr(&ScanCtx.comic_ctx, doc.mime) || is_cbz(&ScanCtx.comic_ctx, doc.mime)) {
+        parse_comic(&ScanCtx.comic_ctx, &job->vfile, &doc);
    } else if (IS_MOBI(doc.mime)) {
        parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
    }
--- a/src/static/css/bootstrap-colorpicker.min.css
+++ b/src/static/css/bootstrap-colorpicker.min.css
--- a/src/static/css/dark.css
+++ b/src/static/css/dark.css
@@ -121,7 +121,7 @@ body {
    background: #546b7a;
 }

-a:hover,.btn:hover {
+a:hover, .btn:hover {
    color: #fff;
 }

@@ -130,7 +130,11 @@ a:hover,.btn:hover {
 }

 .document {
-    padding: 0.5rem;
+    padding: 0.3rem;
+}
+
+.card-text:last-child {
+    margin-top: -1px;
 }

 .document p {
@@ -166,6 +170,12 @@ a:hover,.btn:hover {
    background-color: #FAAB3C;
 }

+.add-tag-button {
+    cursor: pointer;
+    color: #212529;
+    background-color: #e0e0e0;
+}
+
 .card-img-overlay {
    pointer-events: none;
    padding: 0.75rem;
@@ -191,6 +201,18 @@ a:hover,.btn:hover {
    margin-right: 3px;
 }

+.badge-delete {
+    margin-right: -2px;
+    margin-left: 2px;
+    margin-top: -1px;
+    font-family: monospace;
+    font-size: 90%;
+    background: rgba(0, 0, 0, 0.2);
+    padding: 0.1em 0.4em;
+    color: white;
+    cursor: pointer;
+}
+
 .badge-user {
    color: #212529;
    background-color: #e0e0e0;
@@ -200,7 +222,7 @@ a:hover,.btn:hover {
    display: block;
    min-width: 64px;
    max-width: 100%;
-    max-height: 175px;
+    max-height: 240px;
    margin: 0 auto 0;
    padding: 3px 3px 0;
    width: auto;
@@ -223,20 +245,6 @@ a:hover,.btn:hover {
    width: 100%;
 }

-@media screen and (min-width: 1500px) {
-    .container {
-        max-width: 1440px;
-    }
-
-    .bricklayer-column-sizer {
-        width: 20% !important;
-    }
-
-    .bricklayer-column {
-        max-width: 20%;
-    }
-}
-
@media screen and (min-width: 1800px) {
    .container {
        max-width: 1550px;
@@ -433,6 +441,7 @@ option {
    .small-btn {
        display: none;
    }
+
    .large-btn {
        display: inherit;
    }
@@ -442,6 +451,7 @@ option {
    .small-btn {
        display: inherit;
    }
+
    .large-btn {
        display: none;
    }
@@ -512,3 +522,11 @@ svg {
 #graphs-card svg text {
    fill: #eee;
 }
+
+.wholerow {
+    outline: none !important;
+}
+
+.stat > .card-body {
+    padding: 0.7em 1.25em;
+}
--- a/src/static/css/light.css
+++ b/src/static/css/light.css
@@ -70,7 +70,11 @@ body {
 }

 .document {
-    padding: 0.5rem;
+   padding: 0.3rem;
+}
+
+.card-text:last-child {
+    margin-top: -1px;
 }

 .document p {
@@ -106,11 +110,33 @@ body {
    background-color: #e0e0e0;
 }

+.badge {
+    margin-right: 3px;
+}
+
+.badge-delete {
+    margin-right: -2px;
+    margin-left: 2px;
+    margin-top: -1px;
+    font-family: monospace;
+    font-size: 90%;
+    background: rgba(0,0,0,0.2);
+    padding: 0.1em 0.4em;
+    color: white;
+    cursor: pointer;
+}
+
 .badge-text {
    color: #FFFFFF;
    background-color: #FAAB3C;
 }

+.add-tag-button {
+    cursor: pointer;
+    color: #212529;
+    background-color: #e0e0e0;
+}
+
 .card-img-overlay {
    pointer-events: none;
    padding: 0.75rem;
@@ -131,15 +157,12 @@ body {
    overflow: hidden;
 }

-.badge {
-    margin-right: 3px;
-}

 .fit {
    display: block;
    min-width: 64px;
    max-width: 100%;
-    max-height: 175px;
+    max-height: 240px;
    margin: 0 auto 0;
    padding: 3px 3px 0 3px;
    width: auto;
@@ -162,6 +185,10 @@ body {
    width: 100%;
 }

+.bricklayer {
+    /*max-width: 100%;*/
+}
+
@media screen and (max-width: 1200px) {
    .bricklayer-column {
        max-width: 100%;
@@ -375,3 +402,11 @@ mark {
    float: right;
    margin-bottom: 10px;
 }
+
+.wholerow {
+    outline: none !important;
+}
+
+.stat > .card-body {
+    padding: 0.7em 1.25em;
+}
--- a/src/static/js/5_inspire-tree.min.js
+++ b/src/static/js/5_inspire-tree.min.js
--- a/src/static/js/bootstrap-colorpicker.min.js
+++ b/src/static/js/bootstrap-colorpicker.min.js
--- a/src/static/js/dom.js
+++ b/src/static/js/dom.js
@@ -87,6 +87,7 @@ function shouldDisplayRawImage(hit) {
        hit["_source"]["mime"] &&
        !hit["_source"]["parent"] &&
        hit["_source"]["videoc"] !== "tiff" &&
+        hit["_source"]["videoc"] !== "raw" &&
        hit["_source"]["videoc"] !== "ppm";
 }

@@ -152,26 +153,44 @@ function getTags(hit, mimeCategory) {
    // User tags
    if (hit["_source"].hasOwnProperty("tag")) {
        hit["_source"]["tag"].forEach(tag => {
-            const userTag = document.createElement("span");
-            userTag.setAttribute("class", "badge badge-pill badge-user");
-
-            const tokens = tag.split("#");
-
-            if (tokens.length > 1) {
-                const bg = "#" + tokens[1];
-                const fg = lum(tokens[1]) > 40 ? "#000" : "#fff";
-                userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
-            }
-
-            const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
-            userTag.appendChild(document.createTextNode(name));
-            tags.push(userTag);
+            tags.push(makeUserTag(tag, hit));
        })
    }

    return tags
 }

+function makeUserTag(tag, hit) {
+    const userTag = document.createElement("span");
+    userTag.setAttribute("class", "badge badge-pill badge-user");
+    userTag.setAttribute("title", tag.split("#")[0])
+
+    const tokens = tag.split("#");
+
+    if (tokens.length > 1) {
+        const bg = "#" + tokens[1];
+        const fg = lum(tokens[1]) > 50 ? "#000" : "#fff";
+        userTag.setAttribute("style", `background-color: ${bg}; color: ${fg}`);
+    }
+
+    const deleteButton = document.createElement("span");
+    deleteButton.setAttribute("class", "badge badge-pill badge-delete")
+    deleteButton.setAttribute("title", "Delete tag")
+    deleteButton.appendChild(document.createTextNode("X"));
+    deleteButton.addEventListener("click", () => {
+        deleteTag(tag, hit).then(() => {
+            userTag.remove();
+        });
+    });
+    userTag.addEventListener("mouseenter", () => userTag.appendChild(deleteButton));
+    userTag.addEventListener("mouseleave", () => deleteButton.remove());
+
+    const name = tokens[0].split(".")[tokens[0].split(".").length - 1];
+    userTag.appendChild(document.createTextNode(name));
+
+    return userTag;
+}
+
 function infoButtonCb(hit) {
    return () => {
        getDocumentInfo(hit["_id"]).then(doc => {
@@ -191,7 +210,8 @@ function infoButtonCb(hit) {

            const displayFields = new Set([
                "mime", "size", "mtime", "path", "title", "width", "height", "duration", "audioc", "videoc",
-                "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag"
+                "bitrate", "artist", "album", "album_artist", "genre", "title", "font_name", "tag", "author",
+                "modified_by"
            ]);
            Object.keys(doc)
                .filter(key => key.startsWith("_keyword.") || key.startsWith("_text.") || displayFields.has(key) || key.startsWith("exif_"))
@@ -337,9 +357,31 @@ function createDocCard(hit) {

    docCardBody.appendChild(tagContainer);

+    attachTagContainerEventListener(tagContainer, hit);
    return docCard;
 }

+function attachTagContainerEventListener(tagContainer, hit) {
+    const sizeTag = Array.from(tagContainer.children).find(child => child.tagName === "SMALL");
+
+    const addTagButton = document.createElement("span");
+    addTagButton.setAttribute("class", "badge badge-pill add-tag-button");
+    addTagButton.appendChild(document.createTextNode("+Add"));
+
+    tagContainer.addEventListener("mouseenter", () => tagContainer.insertBefore(addTagButton, sizeTag));
+    tagContainer.addEventListener("mouseleave", () => addTagButton.remove());
+
+    addTagButton.addEventListener("click", () => {
+        tagBar.value = "";
+        currentDocToTag = hit;
+        currentTagCallback = tag => {
+            tagContainer.insertBefore(makeUserTag(tag, hit), sizeTag);
+        }
+        $("#tagModal").modal("show");
+        tagBar.focus();
+    });
+}
+
 function makeThumbnail(mimeCategory, hit, imgWrapper, small) {

    if (!hit["_source"].hasOwnProperty("thumbnail")) {
@@ -412,7 +454,6 @@ function createDocLine(hit) {

    if (hit["_source"].hasOwnProperty("parent")) {
        line.classList.add("sub-document");
-        isSubDocument = true;
    }

    const infoButton = makeInfoButton(hit);
@@ -485,6 +526,8 @@ function createDocLine(hit) {
    pathLine.appendChild(path);
    pathLine.appendChild(tagContainer);

+    attachTagContainerEventListener(tagContainer, hit);
+
    return line;
 }

--- a/src/static/js/search.js
+++ b/src/static/js/search.js
@@ -1,4 +1,4 @@
-const SIZE = 40;
+const SIZE = 60;
 let mimeMap = [];
 let tagMap = [];
 let mimeTree;
@@ -6,6 +6,9 @@ let tagTree;

 let searchBar = document.getElementById("searchBar");
 let pathBar = document.getElementById("pathBar");
+let tagBar = document.getElementById("tagBar");
+let currentDocToTag = null;
+let currentTagCallback = null;
 let lastDoc = null;
 let reachedEnd = false;
 let docCount = 0;
@@ -109,13 +112,112 @@ window.onload = () => {
            searchDebounced();
        }
    });
+    new autoComplete({
+        selector: '#tagBar',
+        minChars: 1,
+        delay: 200,
+        renderItem: function (item) {
+            return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item.split("#")[0] + '</div>';
+        },
+        source: async function (term, suggest) {
+            term = term.toLowerCase();
+
+            const choices = await getTagChoices();
+
+            let matches = [];
+            for (let i = 0; i < choices.length; i++) {
+                if (~choices[i].toLowerCase().indexOf(term)) {
+                    matches.push(choices[i]);
+                }
+            }
+            suggest(matches.sort());
+        },
+        onSelect: function (e, item) {
+            const name = item.split("#")[0];
+            const color = "#" + item.split("#")[1];
+            $("#tag-color").val(color);
+            $("#tag-color").trigger("keyup", color);
+            tagBar.value = name;
+            e.preventDefault();
+        }
+    });
+    [tagBar, document.getElementById("tag-color")].forEach(elem => {
+        elem.addEventListener("keyup", e => {
+            if (e.key === "Enter" && tagBar.value.length > 0) {
+                const tag = tagBar.value + document.getElementById("tag-color").value;
+                saveTag(tag, currentDocToTag).then(() => currentTagCallback(tag));
+            }
+        });
+    })
+    $("#tag-color").colorpicker({
+        format: "hex",
+        sliders: {
+            saturation: {
+                selector: '.colorpicker-saturation',
+                callLeft: 'setSaturationRatio',
+                callTop: 'setValueRatio'
+            },
+            hue: {
+                selector: '.colorpicker-hue',
+                maxLeft: 0,
+                callLeft: false,
+                callTop: 'setHueRatio'
+            }
+        }
+    });
 };

+function saveTag(tag, hit) {
+    const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
+
+    return $.jsonPost("/tag/" + hit["_source"]["index"], {
+        delete: false,
+        name: tag,
+        doc_id: hit["_id"],
+        relpath: relPath
+    }).then(() => {
+        tagBar.blur();
+        $("#tagModal").modal("hide");
+        $.toast({
+            heading: "Tag added",
+            text: "Tag saved to index storage and updated in ElasticSearch",
+            stack: 3,
+            bgColor: "#00a4bc",
+            textColor: "#fff",
+            position: 'bottom-right',
+            hideAfter: 3000,
+            loaderBg: "#08c7e8",
+        });
+    })
+}
+
+function deleteTag(tag, hit) {
+    const relPath = hit["_source"]["path"] + "/" + hit["_source"]["name"] + ext(hit);
+
+    return $.jsonPost("/tag/" + hit["_source"]["index"], {
+        delete: true,
+        name: tag,
+        doc_id: hit["_id"],
+        relpath: relPath
+    }).then(() => {
+        $.toast({
+            heading: "Tag deleted",
+            text: "Tag deleted index storage and updated in ElasticSearch",
+            stack: 3,
+            bgColor: "#00a4bc",
+            textColor: "#fff",
+            position: 'bottom-right',
+            hideAfter: 3000,
+            loaderBg: "#08c7e8",
+        });
+    })
+}
+
 function toggleFuzzy() {
    searchDebounced();
 }

-$.jsonPost("i").then(resp => {
+$.get("i").then(resp => {

    const urlIndices = (new URLSearchParams(location.search)).get("i");
    resp["indices"].forEach(idx => {
@@ -151,7 +253,7 @@ function handleTreeClick(tree) {

        if (node.id === "any") {
            if (!node.itree.state.checked) {
-                tree.deselect();
+                tree.deselectDeep();
            }
        } else {
            tree.node("any").deselect();
@@ -234,6 +336,9 @@ $.jsonPost("es", {
        selection: {
            mode: 'checkbox'
        },
+        checkbox: {
+            autoCheckChildren: false
+        },
        data: tagMap
    });
    new InspireTreeDOM(tagTree, {
@@ -245,20 +350,70 @@ $.jsonPost("es", {
 });

 function addTag(map, tag, id, count) {
-    let tags = tag.split("#")[0].split(".");
+    // let tags = tag.split("#")[0].split(".");
+    let tags = tag.split(".");

    let child = {
        id: id,
-        text: tags.length !== 1 ? tags[0] : `${tags[0]} (${count})`,
-        children: []
+        values: [id],
+        count: count,
+        text: tags.length !== 1 ? tags[0] : `${tags[0].split("#")[0]} (${count})`,
+        name: tags[0],
+        children: [],
+        isLeaf: tags.length === 1,
+        //Overwrite base functions
+        blur: function () {
+        },
+        select: function () {
+            this.state("selected", true);
+            return this.check()
+        },
+        deselect: function () {
+            this.state("selected", false);
+            return this.uncheck()
+        },
+        uncheck: function () {
+            if (!this.isLeaf) {
+                return;
+            }
+
+            baseStateChange('checked', false, 'unchecked', this, false);
+            this.state('indeterminate', false);
+
+            if (this.hasParent()) {
+                this.getParent().refreshIndeterminateState();
+            }
+
+            this._tree.end();
+            return this;
+        },
+        check: function () {
+            if (!this.isLeaf) {
+                return;
+            }
+
+            baseStateChange('checked', true, 'checked', this, false);
+
+            if (this.hasParent()) {
+                this.getParent().refreshIndeterminateState();
+            }
+
+            this._tree.end();
+            return this;
+        }
    };

    let found = false;
    map.forEach(node => {
-        if (node.text === child.text) {
+        if (node.name.split("#")[0] === child.name.split("#")[0]) {
            found = true;
            if (tags.length !== 1) {
                addTag(node.children, tags.slice(1).join("."), id, count);
+            } else {
+                // Same name, different color
+                node.count += count;
+                node.text = `${tags[0].split("#")[0]} (${node.count})`;
+                node.values.push(id);
            }
        }
    });
@@ -310,7 +465,11 @@ function getSelectedNodes(tree) {

        //Only get children
        if (selected[i].text.indexOf("(") !== -1) {
-            selectedNodes.push(selected[i].id);
+            if (selected[i].values) {
+                selectedNodes.push(selected[i].values);
+            } else {
+                selectedNodes.push(selected[i].id);
+            }
        }
    }

@@ -373,7 +532,9 @@ function search(after = null) {

    let tags = getSelectedNodes(tagTree);
    if (!tags.includes("any")) {
-        filters.push({terms: {"tag": tags}});
+        tags.forEach(tagGroup => {
+            filters.push({terms: {"tag": tagGroup}})
+        })
    }

    if (date_min && date_max) {
@@ -617,6 +778,7 @@ function getNextDepth(node) {
                    text: `${name}/ (${bucket.doc_count})`,
                    depth: node.depth + 1,
                    index: node.index,
+                    values: [bucket.key],
                    children: true,
                }
            }).filter(x => x !== null)
@@ -647,6 +809,7 @@ function createPathTree(target) {
    selectedIndices.forEach(index => {
        pathTree.addNode({
            id: "/" + index,
+            values: ["/" + index],
            text: `/[${indexMap[index]}]`,
            index: index,
            depth: 0,
@@ -675,5 +838,34 @@ function getPathChoices() {
                }
            }
        }).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
-    })
+    });
+}
+
+
+function getTagChoices() {
+    return new Promise(getPaths => {
+        $.jsonPost("es", {
+            suggest: {
+                tag: {
+                    prefix: tagBar.value,
+                    completion: {
+                        field: "suggest-tag",
+                        skip_duplicates: true,
+                        size: 10000
+                    }
+                }
+            }
+        }).then(resp => {
+            const result = [];
+            resp["suggest"]["tag"][0]["options"].map(opt => opt["_source"]["tag"]).forEach(tags => {
+                tags.forEach(tag => {
+                    const t = tag.split("#")[0];
+                    if (!result.find(x => x.split("#")[0] === t)) {
+                        result.push(tag);
+                    }
+                });
+            });
+            getPaths(result);
+        });
+    });
 }
--- a/src/static/js/util.js
+++ b/src/static/js/util.js
@@ -101,7 +101,8 @@ const _defaults = {
    treemapColor: "PuBuGn",
    treemapSize: "large",
    suggestPath: true,
-    fragmentSize: 100
+    fragmentSize: 100,
+    columns: 5
 };

 function loadSettings() {
@@ -118,6 +119,7 @@ function loadSettings() {
    $("#settingTreemapType").val(CONF.options.treemapType);
    $("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
    $("#settingFragmentSize").val(CONF.options.fragmentSize);
+    $("#settingColumns").val(CONF.options.columns);
 }

 function Settings() {
@@ -125,6 +127,7 @@ function Settings() {

    this._onUpdate = function () {
        $("#fuzzyToggle").prop("checked", this.options.fuzzy);
+        updateColumnStyle();
    };

    this.load = function () {
@@ -161,6 +164,7 @@ function updateSettings() {
    CONF.options.treemapType = $("#settingTreemapType").val();
    CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
    CONF.options.fragmentSize = $("#settingFragmentSize").val();
+    CONF.options.columns = $("#settingColumns").val();
    CONF.save();

    if (typeof searchDebounced !== "undefined") {
@@ -203,3 +207,26 @@ function toggleTheme() {
    }
    window.location.reload();
 }
+
+function updateColumnStyle() {
+    const style = document.getElementById("style");
+    if (style) {
+        style.innerHTML =
+        `
+@media screen and (min-width: 1500px) {
+    .container {
+            max-width: 1440px;
+        }
+
+    .bricklayer-column-sizer {
+            width: ${100 / CONF.options.columns}% !important;
+        }
+
+    .bricklayer-column {
+            max-width: ${100 / CONF.options.columns}%;
+        }
+    }
+}
+        `
+    }
+}
--- a/src/static/search.html
+++ b/src/static/search.html
@@ -6,15 +6,17 @@
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>

    <link href="css" rel="stylesheet" type="text/css">
+    <style id="style"></style>
 </head>
 <body>

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.4.2</span>
+    <span class="badge badge-pill version">2.7.4</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a class="btn ml-auto" href="/stats">Stats</a>
-    <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
+    <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings
+    </button>
    <button class="btn" title="Toggle theme" onclick="toggleTheme()">Theme</button>
 </nav>

@@ -48,8 +50,11 @@
                <div class="col">
                    <div class="input-group" style="margin-bottom: 0.5em; margin-top: 1em">
                        <div class="input-group-prepend">
-                            <button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal" data-target="#pathTreeModal">
-                                <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px"><path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/></svg>
+                            <button id="pathBarHelper" class="btn btn-outline-secondary" data-toggle="modal"
+                                    data-target="#pathTreeModal">
+                                <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512" width="20px">
+                                    <path d="M288 224h224a32 32 0 0 0 32-32V64a32 32 0 0 0-32-32H400L368 0h-80a32 32 0 0 0-32 32v64H64V8a8 8 0 0 0-8-8H40a8 8 0 0 0-8 8v392a16 16 0 0 0 16 16h208v64a32 32 0 0 0 32 32h224a32 32 0 0 0 32-32V352a32 32 0 0 0-32-32H400l-32-32h-80a32 32 0 0 0-32 32v64H64V128h192v64a32 32 0 0 0 32 32zm0 96h66.74l32 32H512v128H288zm0-288h66.74l32 32H512v128H288z"/>
+                                </svg>
                            </button>
                        </div>
                        <input id="pathBar" type="search" class="form-control" placeholder="Filter path">
@@ -156,7 +161,8 @@
                        <i>fried eggs</i> and either <i>eggplant</i> or <i>potato</i>, but will ignore results
                        containing <i>frittata</i>.</p>

-                    <p>When neither <code>+</code> or <code>|</code> is specified, the default operator is <code>+</code> (and).</p>
+                    <p>When neither <code>+</code> or <code>|</code> is specified, the default operator is
+                        <code>+</code> (and).</p>
                    <p>When the <b>Fuzzy</b> option is checked, partial matches are also returned.</p>
                    <br>
                    <p>For more information, see <a target="_blank"
@@ -189,12 +195,14 @@

                    <div class="custom-control custom-checkbox">
                        <input type="checkbox" class="custom-control-input" id="settingSearchInPath">
-                        <label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
+                        <label class="custom-control-label" for="settingSearchInPath">Enable matching query against
+                            document path</label>
                    </div>

                    <div class="custom-control custom-checkbox">
                        <input type="checkbox" class="custom-control-input" id="settingSuggestPath">
-                        <label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
+                        <label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter
+                            bar</label>
                    </div>

                    <br/>
@@ -209,6 +217,20 @@
                        <option value="list">List</option>
                    </select>

+                    <div class="form-group">
+                        <label for="settingColumns">Maximum column count</label>
+                        <select id="settingColumns" class="form-control form-control-sm">
+                            <option value="3">3</option>
+                            <option value="4">4</option>
+                            <option value="5">5</option>
+                            <option value="6">6</option>
+                            <option value="7">7</option>
+                            <option value="8">8</option>
+                            <option value="9">9</option>
+                            <option value="13">13</option>
+                        </select>
+                    </div>
+
                    <hr/>
                    <h4>Stats</h4>

@@ -288,6 +310,32 @@
        </div>
    </div>

+    <div class="modal" id="tagModal" tabindex="-1" role="dialog" aria-labelledby="modal-title" aria-hidden="true">
+        <div class="modal-dialog modal-dialog-centered" role="document">
+            <div class="modal-content">
+                <div class="modal-header">
+                    <h5 class="modal-title">Add tag</h5>
+                    <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+                        <span aria-hidden="true">&times;</span>
+                    </button>
+                </div>
+
+                <div class="modal-body">
+                    <div class="form-group">
+                        <div class="row">
+                            <div class="col col-8">
+                                <input type="text" id="tagBar" class="form-control">
+                            </div>
+                            <div class="col col-4">
+                                <input type="text" id="tag-color" value="" class="form-control"/>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+
    <div id="searchResults"></div>
 </div>

--- a/src/static/stats.html
+++ b/src/static/stats.html
@@ -10,7 +10,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.4.2</span>
+    <span class="badge badge-pill version">2.7.4</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a style="margin-left: auto" class="btn" href="/">Back</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -94,6 +94,19 @@
                    <option value="list">List</option>
                </select>

+                <div class="form-group">
+                    <label for="settingColumns">Maximum column count</label>
+                    <select id="settingColumns" class="form-control form-control-sm">
+                        <option value="3">3</option>
+                        <option value="4">4</option>
+                        <option value="5">5</option>
+                        <option value="6">6</option>
+                        <option value="7">7</option>
+                        <option value="8">8</option>
+                        <option value="9">9</option>
+                    </select>
+                </div>
+
                <hr/>
                <h4>Stats</h4>

--- a/src/tpool.c
+++ b/src/tpool.c
@@ -3,6 +3,8 @@
 #include "sist.h"
 #include <pthread.h>

+#define MAX_QUEUE_SIZE 10000
+
 typedef void (*thread_func_t)(void *arg);

 typedef struct tpool_work {
@@ -26,6 +28,7 @@ typedef struct tpool {
    int work_cnt;
    int done_cnt;

+    int free_arg;
    int stop;

    void (*cleanup_func)();
@@ -79,6 +82,10 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
        return 0;
    }

+    while ((pool->work_cnt - pool->done_cnt) >= MAX_QUEUE_SIZE) {
+        usleep(100000);
+    }
+
    pthread_mutex_lock(&(pool->work_mutex));
    if (pool->work_head == NULL) {
        pool->work_head = work;
@@ -121,7 +128,9 @@ static void *tpool_worker(void *arg) {
            }

            work->func(work->arg);
-            free(work->arg);
+            if (pool->free_arg) {
+                free(work->arg);
+            }
            free(work);
        }

@@ -138,8 +147,10 @@ static void *tpool_worker(void *arg) {
        pthread_mutex_unlock(&(pool->work_mutex));
    }

-    LOG_INFO("tpool.c", "Executing cleaup function")
-    pool->cleanup_func();
+    if (pool->cleanup_func != NULL) {
+        LOG_INFO("tpool.c", "Executing cleanup function")
+        pool->cleanup_func();
+    }

    pthread_cond_signal(&(pool->working_cond));
    pthread_mutex_unlock(&(pool->work_mutex));
@@ -207,13 +218,14 @@ void tpool_destroy(tpool_t *pool) {
 * Create a thread pool
 * @param thread_cnt Worker threads count
 */
-tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
+tpool_t *tpool_create(size_t thread_cnt, void cleanup_func(), int free_arg) {

    tpool_t *pool = malloc(sizeof(tpool_t));
    pool->thread_cnt = thread_cnt;
    pool->work_cnt = 0;
    pool->done_cnt = 0;
    pool->stop = 0;
+    pool->free_arg = free_arg;
    pool->cleanup_func = cleanup_func;
    pool->threads = calloc(sizeof(pthread_t), thread_cnt);

--- a/src/tpool.h
+++ b/src/tpool.h
@@ -8,7 +8,7 @@ typedef struct tpool tpool_t;

 typedef void (*thread_func_t)(void *arg);

-tpool_t *tpool_create(size_t num, void (*cleanup_func)());
+tpool_t *tpool_create(size_t num, void (*cleanup_func)(), int free_arg);
 void tpool_start(tpool_t *pool);
 void tpool_destroy(tpool_t *tm);

--- a/src/types.h
+++ b/src/types.h
@@ -10,7 +10,7 @@ typedef struct index_descriptor {
    char version[64];
    long timestamp;
    char root[PATH_MAX];
-    char rewrite_url[8196];
+    char rewrite_url[8192];
    short root_len;
    char name[1024];
    char type[64];
@@ -19,6 +19,7 @@ typedef struct index_descriptor {
 typedef struct index_t {
    struct index_descriptor desc;
    struct store_t *store;
+    struct store_t *tag_store;
    char path[PATH_MAX];
 } index_t;

--- a/src/web/serve.c
+++ b/src/web/serve.c
@@ -53,6 +53,14 @@ store_t *get_store(const char *index_id) {
    return NULL;
 }

+store_t *get_tag_store(const char *index_id) {
+    index_t *idx = get_index_by_id(index_id);
+    if (idx != NULL) {
+        return idx->tag_store;
+    }
+    return NULL;
+}
+
 void search_index(struct mg_connection *nc) {
    send_response_line(nc, 200, sizeof(search_html), "Content-Type: text/html");
    mg_send(nc, search_html, sizeof(search_html));
@@ -68,6 +76,7 @@ void stats(struct mg_connection *nc) {
 void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {

    if (path->len != UUID_STR_LEN + 4) {
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -78,6 +87,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st

    index_t *index = get_index_by_id(arg_uuid);
    if (index == NULL) {
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -101,7 +111,7 @@ void stats_files(struct mg_connection *nc, struct http_message *hm, struct mg_st
            return;
    }

-    char disposition[8196];
+    char disposition[8192];
    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s\"", file);

    char full_path[PATH_MAX];
@@ -173,6 +183,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str

    if (path->len != UUID_STR_LEN * 2 + 2) {
        LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -189,6 +200,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
    int ret = uuid_parse(arg_uuid, uuid);
    if (ret != 0) {
        LOG_DEBUGF("serve.c", "Invalid thumbnail UUID: %s", arg_uuid)
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -196,6 +208,7 @@ void thumbnail(struct mg_connection *nc, struct http_message *hm, struct mg_str
    store_t *store = get_store(arg_index);
    if (store == NULL) {
        LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -214,6 +227,7 @@ void search(struct mg_connection *nc, struct http_message *hm) {

    if (hm->body.len == 0) {
        LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
+        mg_http_send_error(nc, 500, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -242,7 +256,7 @@ int serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {

    const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;

-    char url[8196];
+    char url[8192];
    snprintf(url, sizeof(url),
             "%s%s/%s%s%s",
             idx->desc.rewrite_url, path_unescaped, name_unescaped, strlen(ext) == 0 ? "" : ".", ext);
@@ -277,7 +291,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s

    LOG_DEBUGF("serve.c", "Serving file from disk: %s", full_path)

-    char disposition[8196];
+    char disposition[8192];
    snprintf(disposition, sizeof(disposition), "Content-Disposition: inline; filename=\"%s%s%s\"",
             name, strlen(ext) == 0 ? "" : ".", ext);

@@ -314,6 +328,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_

    if (path->len != UUID_STR_LEN + 2) {
        LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -328,6 +343,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
    cJSON *index_id = cJSON_GetObjectItem(source, "index");
    if (index_id == NULL) {
        cJSON_Delete(doc);
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -335,6 +351,7 @@ void document_info(struct mg_connection *nc, struct http_message *hm, struct mg_
    index_t *idx = get_index_by_id(index_id->valuestring);
    if (idx == NULL) {
        cJSON_Delete(doc);
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -352,6 +369,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path

    if (path->len != UUID_STR_LEN + 2) {
        LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
        nc->flags |= MG_F_SEND_AND_CLOSE;
        return;
    }
@@ -371,6 +389,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
        index_id = cJSON_GetObjectItem(source, "index");
        if (index_id == NULL) {
            cJSON_Delete(doc);
+            mg_http_send_error(nc, 404, NULL);
            nc->flags |= MG_F_SEND_AND_CLOSE;
            return;
        }
@@ -386,6 +405,7 @@ void file(struct mg_connection *nc, struct http_message *hm, struct mg_str *path
    if (idx == NULL) {
        cJSON_Delete(doc);
        nc->flags |= MG_F_SEND_AND_CLOSE;
+        mg_http_send_error(nc, 404, NULL);
        return;
    }

@@ -410,6 +430,177 @@ void status(struct mg_connection *nc) {
    nc->flags |= MG_F_SEND_AND_CLOSE;
 }

+typedef struct {
+    char *name;
+    int delete;
+    char *relpath;
+    char *doc_id;
+} tag_req_t;
+
+tag_req_t *parse_tag_request(cJSON *json) {
+
+    if (!cJSON_IsObject(json)) {
+        return NULL;
+    }
+
+    cJSON *arg_name = cJSON_GetObjectItem(json, "name");
+    if (arg_name == NULL || !cJSON_IsString(arg_name)) {
+        return NULL;
+    }
+
+    cJSON *arg_delete = cJSON_GetObjectItem(json, "delete");
+    if (arg_delete == NULL || !cJSON_IsBool(arg_delete)) {
+        return NULL;
+    }
+
+    cJSON *arg_relpath = cJSON_GetObjectItem(json, "relpath");
+    if (arg_relpath == NULL || !cJSON_IsString(arg_relpath)) {
+        return NULL;
+    }
+
+    cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
+    if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
+        return NULL;
+    }
+
+    tag_req_t *req = malloc(sizeof(tag_req_t));
+    req->delete = arg_delete->valueint;
+    req->name = arg_name->valuestring;
+    req->relpath = arg_relpath->valuestring;
+    req->doc_id = arg_doc_id->valuestring;
+
+    return req;
+}
+
+void tag(struct mg_connection *nc, struct http_message *hm, struct mg_str *path) {
+    if (path->len != UUID_STR_LEN + 4) {
+        LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) path->len, path->p)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return;
+    }
+
+    char arg_index[UUID_STR_LEN];
+    memcpy(arg_index, hm->uri.p + 5, UUID_STR_LEN);
+    *(arg_index + UUID_STR_LEN - 1) = '\0';
+
+    if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
+        LOG_DEBUG("serve.c", "Invalid tag request")
+        mg_http_send_error(nc, 400, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return;
+    }
+
+    store_t *store = get_tag_store(arg_index);
+    if (store == NULL) {
+        LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
+        mg_http_send_error(nc, 404, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return;
+    }
+
+    char *body = malloc(hm->body.len + 1);
+    memcpy(body, hm->body.p, hm->body.len);
+    *(body + hm->body.len) = '\0';
+    cJSON *json = cJSON_Parse(body);
+
+    tag_req_t *arg_req = parse_tag_request(json);
+    if (arg_req == NULL) {
+        LOG_DEBUGF("serve.c", "Could not parse tag request", arg_index)
+        cJSON_Delete(json);
+        free(body);
+        mg_http_send_error(nc, 400, NULL);
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return;
+    }
+
+    cJSON *arr = NULL;
+
+    size_t data_len = 0;
+    const char *data = store_read(store, arg_req->relpath, strlen(arg_req->relpath), &data_len);
+    if (data_len == 0) {
+        arr = cJSON_CreateArray();
+    } else {
+        arr = cJSON_Parse(data);
+    }
+
+    if (arg_req->delete) {
+
+        if (data_len > 0) {
+            cJSON *element = NULL;
+            int i = 0;
+            cJSON_ArrayForEach(element, arr) {
+                if (strcmp(element->valuestring, arg_req->name) == 0) {
+                    cJSON_DeleteItemFromArray(arr, i);
+                    break;
+                }
+                i++;
+            }
+        }
+
+        char buf[8192];
+        snprintf(buf, sizeof(buf),
+                 "{"
+                 "    \"script\" : {"
+                 "        \"source\": \"if (ctx._source.tag.contains(params.tag)) { ctx._source.tag.remove(ctx._source.tag.indexOf(params.tag)) }\","
+                 "        \"lang\": \"painless\","
+                 "        \"params\" : {"
+                 "            \"tag\" : \"%s\""
+                 "        }"
+                 "    }"
+                 "}", arg_req->name
+        );
+
+        char url[4096];
+        snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
+        nc->user_data = web_post_async(url, buf);
+
+    } else {
+        cJSON_AddItemToArray(arr, cJSON_CreateString(arg_req->name));
+
+        char buf[8192];
+        snprintf(buf, sizeof(buf),
+                 "{"
+                 "    \"script\" : {"
+                 "        \"source\": \"if(ctx._source.tag == null) {ctx._source.tag = new ArrayList()} ctx._source.tag.add(params.tag)\","
+                 "        \"lang\": \"painless\","
+                 "        \"params\" : {"
+                 "            \"tag\" : \"%s\""
+                 "        }"
+                 "    }"
+                 "}", arg_req->name
+        );
+
+        char url[4096];
+        snprintf(url, sizeof(url), "%s/sist2/_update/%s", WebCtx.es_url, arg_req->doc_id);
+        nc->user_data = web_post_async(url, buf);
+    }
+
+    char *json_str = cJSON_PrintUnformatted(arr);
+    store_write(store, arg_req->relpath, strlen(arg_req->relpath) + 1, json_str, strlen(json_str) + 1);
+
+    free(arg_req);
+    free(json_str);
+    cJSON_Delete(json);
+    cJSON_Delete(arr);
+    free(body);
+}
+
+int validate_auth(struct mg_connection *nc, struct http_message *hm) {
+    char user[256] = {0,};
+    char pass[256] = {0,};
+
+    int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
+    if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
+        mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
+                      "WWW-Authenticate: Basic realm=\"sist2\"\r\n"
+                      "Content-Length: 0\r\n\r\n");
+        nc->flags |= MG_F_SEND_AND_CLOSE;
+        return FALSE;
+    }
+    return TRUE;
+}
+
 static void ev_router(struct mg_connection *nc, int ev, void *p) {
    struct mg_str scheme;
    struct mg_str user_info;
@@ -423,21 +614,14 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
        struct http_message *hm = (struct http_message *) p;

        if (mg_parse_uri(hm->uri, &scheme, &user_info, &host, &port, &path, &query, &fragment) != 0) {
+            mg_http_send_error(nc, 400, NULL);
            nc->flags |= MG_F_SEND_AND_CLOSE;
            return;
        }


        if (WebCtx.auth_enabled == TRUE) {
-            char user[256] = {0,};
-            char pass[256] = {0,};
-
-            int ret = mg_get_http_basic_auth(hm, user, sizeof(user), pass, sizeof(pass));
-            if (ret == -1 || strcmp(user, WebCtx.auth_user) != 0 || strcmp(pass, WebCtx.auth_pass) != 0) {
-                mg_printf(nc, "HTTP/1.1 401 Unauthorized\r\n"
-                              "WWW-Authenticate: Basic realm=\"sist2\"\r\n"
-                              "Content-Length: 0\r\n\r\n");
-                nc->flags |= MG_F_SEND_AND_CLOSE;
+            if (!validate_auth(nc, hm)) {
                return;
            }
        }
@@ -466,9 +650,17 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
            thumbnail(nc, hm, &path);
        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/s/")))) {
            stats_files(nc, hm, &path);
+        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/tag/")))) {
+            if (WebCtx.tag_auth_enabled == TRUE) {
+                if (!validate_auth(nc, hm)) {
+                    return;
+                }
+            }
+            tag(nc, hm, &path);
        } else if (has_prefix(&path, &((struct mg_str) MG_MK_STR("/d/")))) {
            document_info(nc, hm, &path);
        } else {
+            mg_http_send_error(nc, 404, NULL);
            nc->flags |= MG_F_SEND_AND_CLOSE;
        }

@@ -499,7 +691,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *p) {
                        free(json_str);
                        free(tmp);
                    }
-                    //todo return error code
+                    mg_http_send_error(nc, 500, NULL);
                }

                free_response(r);
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/third-party/libscan
+++ b/third-party/libscan
Author	SHA1	Message	Date
simon987	ca994d3914	Fix bug with media files, don't encode thumbnail when not necessary	2020-07-26 11:52:48 -04:00
simon987	db2285973f	Configurable column count	2020-07-26 11:50:21 -04:00
simon987	61de9e9f14	Set timeout for HTTP get request	2020-07-25 19:55:27 -04:00
simon987	3015ef0ff4	Increase file preview file	2020-07-25 17:26:17 -04:00
simon987	b55d432841	Fix #65	2020-07-25 09:37:37 -04:00
simon987	ed90a140ce	Update README.md	2020-07-19 14:53:03 -04:00
simon987	052df82373	Fix #83	2020-07-19 13:10:30 -04:00
simon987	5676136777	Remove println that was left accidentally	2020-07-18 20:55:12 -04:00
simon987	c061613302	Fix #76	2020-07-18 19:23:43 -04:00
simon987	d0325fd9b9	Fix for simon987/sist2#85	2020-07-18 18:48:54 -04:00
simon987	e05a6f3863	Fix for #75	2020-07-18 18:46:52 -04:00
simon987	f1690a9cca	Mobi build fix	2020-07-18 13:10:45 -04:00
simon987	100a264413	Don't show MuPDF warnings unless --very-verbose is specified	2020-07-18 10:28:05 -04:00
simon987	29390bb454	Update README	2020-07-18 09:54:36 -04:00
simon987	4d43036ded	Fix simon987/sist2#78	2020-07-18 09:41:39 -04:00
simon987	0b5cdbd130	Fix #79	2020-07-18 09:36:10 -04:00
simon987	53d7695f66	Read .raw thumbnails #80 , fix media probing for some formats	2020-07-18 09:31:42 -04:00
simon987	8d53456404	fix libscan submodule	2020-07-17 20:33:50 -04:00
simon987	cbc08a7cc9	Save ebook renders as jpeg	2020-07-17 20:18:21 -04:00
simon987	e629b4d7d3	Faster comic book parsing, probably fixes #77	2020-07-17 19:10:18 -04:00
simon987	22f7073b39	mobi reading bugfix	2020-07-16 20:30:28 -04:00
simon987	1781a74960	Oops I didn't mean to push this	2020-07-16 19:23:52 -04:00
simon987	db96c95ac7	log fix #73	2020-07-16 19:19:23 -04:00
simon987	7b9fa4cc0a	Fix bad merge...	2020-07-15 21:00:51 -04:00
simon987	5cc1fa86a9	Read embedded thumbnail simon987/sist2#74	2020-07-15 20:56:25 -04:00
simon987	649689ce30	Remove warning when generating stats	2020-07-15 20:41:38 -04:00
simon987	c8536f65a8	Fix memory leak in index	2020-07-15 20:41:09 -04:00
simon987	75b5e249c1	Merge pull request #72 from dpieski/patch-1 Update USAGE.md	2020-07-15 14:37:28 -04:00
Andrew	f49e03ac79	Update USAGE.md added example for Windows to display number of logical processors. Does this same limitation apply to the new `index` threads option?	2020-07-15 13:21:02 -05:00
simon987	a6d2afc8dc	Merge pull request #71 from simon987/web-tag Web tag	2020-07-14 20:23:22 -04:00
simon987	8f8f66ba05	Update README.md	2020-07-14 20:22:03 -04:00
simon987	1d9fcf7105	Manual tagging	2020-07-13 19:18:07 -04:00
simon987	8127745f2b	wip	2020-07-13 19:16:51 -04:00
simon987	230988d6d1	frontend tags	2020-07-13 19:15:59 -04:00
simon987	13f4dbed2d	Handle 429, multi-threaded index module	2020-07-11 17:42:46 -04:00
simon987	ed15e89f45	Fix exec-script --es-url not being passed	2020-06-28 12:41:09 -04:00
simon987	c636d3d921	Set number_of_replicas to 0 by default in elasticsearch	2020-06-26 18:10:51 -04:00
simon987	7e92d4b7d1	refresh index only if user script is ran	2020-06-25 20:48:47 -04:00
simon987	8ffe780ab2	Tag tree fix for #64 , validate required argument in exec-script	2020-06-25 20:11:30 -04:00
simon987	d3c8928fe8	Update readme	2020-06-24 21:06:27 -04:00
simon987	d9f628fca4	Build fix	2020-06-21 16:53:22 -04:00
simon987	68289268c1	Add exif tag	2020-06-21 16:51:14 -04:00
simon987	649c50c465	Update README.md	2020-06-21 14:35:18 -04:00
simon987	7b49a0dc49	Build fix	2020-06-21 12:56:13 -04:00
simon987	eb559b53aa	RAW picture file support	2020-06-21 10:46:11 -04:00
simon987	6d01f9c0df	whoops	2020-06-19 22:12:19 -04:00
simon987	e724fec668	Fix web return codes	2020-06-19 21:41:17 -04:00
simon987	fe5e93b300	Update USAGE.md	2020-06-19 21:29:09 -04:00
simon987	ecad85fd7d	version bump	2020-06-19 21:10:03 -04:00
simon987	74cc898259	Fix tag display issue	2020-06-19 21:07:19 -04:00
simon987	dc2e4443c4	Add exec-script command	2020-06-19 21:07:19 -04:00
simon987	1a64431b52	Merge pull request #63 from dpieski/patch-3 Correct typos in example	2020-06-19 18:26:10 -04:00
Andrew	9bad515e06	Correct typos in example Correct typos in examples.	2020-06-19 17:22:02 -05:00
simon987	648559cedb	Update README.md	2020-06-17 13:25:20 -04:00
simon987	3e6cd9cd5c	Merge pull request #60 from dpieski/patch-2 update Usage.md	2020-06-17 13:04:46 -04:00
simon987	f249992798	Update scripting.md	2020-06-17 13:00:07 -04:00
Andrew	e9645ecdaa	update Usage.md Fixing a link.	2020-06-17 10:58:25 -05:00