Handle special characters in file paths

Fragment size setting
Fix file wordexp in some paths #59
2025-12-12 15:08:53 +00:00 · 2020-06-10 19:45:36 -04:00 · 2020-06-09 21:40:53 -04:00 · 2020-06-05 19:41:02 -04:00 · 2020-06-05 19:13:03 -04:00 · 2020-06-02 19:46:58 -04:00
25 changed files with 213 additions and 64 deletions
--- a/.github/ISSUE_TEMPLATE/issue-template.md
+++ b/.github/ISSUE_TEMPLATE/issue-template.md
@@ -9,7 +9,9 @@ assignees: ''

 sist2 version:

-Platform (please indicate if you're using Docker):
+Platform (Linux or Docker):
+
+Elasticsearch version:

 Command with arguments:  `ex: "scan ~/Documents -o ./i2 --threads 3 -q 1.0`

--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ sist2 (Simple incremental search tool)
    1. *(or)* Download a [development snapshot](https://files.simon987.net/artifacts/Sist2/Build/) *(Not recommended!)*
    1. *(or)* `docker pull simon987/sist2:latest`

-1. See [Usage guide](DOCS/USAGE.md)
+1. See [Usage guide](docs/USAGE.md)
   

 \* *Windows users*: **sist2** runs under [WSL](https://en.wikipedia.org/wiki/Windows_Subsystem_for_Linux)    
@@ -61,7 +61,7 @@ sist2 (Simple incremental search tool)

 ## Example usage

-See [Usage guide](DOCS/USAGE.md) for more details
+See [Usage guide](docs/USAGE.md) for more details

 1. Scan a directory: `sist2 scan ~/Documents -o ./docs_idx`
 1. Push index to Elasticsearch: `sist2 index ./docs_idx`
@@ -91,14 +91,12 @@ they were directly in the file system. Recursive (archives inside archives)
 scan is also supported.

 **Limitations**:
-* Parsing media files with formats that require
-*seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) is not supported.
+* Support for parsing media files with formats that require *seek* (e.g. `.gif`, `.mp4` w/ fragmented metadata etc.) 
+  is limitted (see `--mem-buffer` option)
 * Archive files are scanned sequentially, by a single thread. On systems where
 **sist2** is not I/O bound, scans might be faster when larger archives are split
 into smaller parts.
 
-To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
- 
 
 ### OCR

--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -40,6 +40,9 @@ Scan options
    --ocr=<str>           Tesseract language (use tesseract --list-langs to see which are installed on your machine)
    -e, --exclude=<str>   Files that match this regex will not be scanned
    --fast                Only index file names & mime type
+    --treemap-threshold=<str>     Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005
+    --mem-buffer=<int>            Maximum memory buffer size in MB for files inside archives (see USAGE.md). DEFAULT: 2000
+

 Index options
    --es-url=<str>        Elasticsearch url with port. DEFAULT=http://localhost:9200
@@ -102,6 +105,11 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
    In effect, smaller `treemap-threshold` values will yield a more detailed 
    (but also a more cluttered and harder to read) visualization. 
    
+* `--mem-buffer` Maximum memory buffer size in MB (per thread) for files inside archives. Media files 
+    larger than this number will be read sequentially and no *seek* operations will be supported.
+
+    To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
+
 ### Scan examples

 Simple scan
--- a/schema/mappings.json
+++ b/schema/mappings.json
@@ -10,6 +10,7 @@
    "path": {
      "type": "text",
      "analyzer": "path_analyzer",
+      "copy_to": "suggest-path",
      "fielddata": true,
      "fields": {
        "nGram": {
@@ -22,6 +23,10 @@
        }
      }
    },
+    "suggest-path": {
+      "type": "completion",
+      "analyzer": "case_insensitive_kw_analyzer"
+    },
    "mime": {
      "type": "keyword"
    },
--- a/scripts/mime.csv
+++ b/scripts/mime.csv
@@ -13,7 +13,7 @@ application/epub+zip, epub
 application/freeloader, frl
 application/futuresplash, spl
 application/groupwise, vew
-application/gzip, gz
+application/gzip, gz|tgz
 application/hta, hta
 application/i-deas, unv
 application/iges, iges|igs
@@ -429,4 +429,4 @@ video/x-qtc, qtc
 video/x-sgi-movie, movie|mv
 x-epoc/x-sisx-app,
 application/x-zstd-dictionary,
-application/vnd.ms-outlook,
+application/vnd.ms-outlook, msg
--- a/src/cli.c
+++ b/src/cli.c
@@ -14,6 +14,8 @@
 #define DEFAULT_LISTEN_ADDRESS "localhost:4090"
 #define DEFAULT_TREEMAP_THRESHOLD 0.0005

+#define DEFAULT_MAX_MEM_BUFFER 2000
+
 const char* TESS_DATAPATHS[] = {
        "/usr/share/tessdata/",
        "/usr/share/tesseract-ocr/tessdata/",
@@ -187,6 +189,10 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
        args->treemap_threshold = atof(args->treemap_threshold_str);
    }

+    if (args->max_memory_buffer == 0) {
+        args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
+    }
+
    LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
    LOG_DEBUGF("cli.c", "arg size=%d", args->size)
    LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@@ -203,6 +209,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    LOG_DEBUGF("cli.c", "arg exclude=%s", args->exclude_regex)
    LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
    LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
+    LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)

    return 0;
 }
--- a/src/cli.h
+++ b/src/cli.h
@@ -24,6 +24,7 @@ typedef struct scan_args {
    int fast;
    const char* treemap_threshold_str;
    double treemap_threshold;
+    int max_memory_buffer;
 } scan_args_t;

 scan_args_t *scan_args_create();
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -202,9 +202,8 @@ void delete_queue(int max) {
        Indexer->line_head = tmp->next;
        if (Indexer->line_head == NULL) {
            Indexer->line_tail = NULL;
-        } else {
-            free(tmp);
        }
+        free(tmp);
        Indexer->queued -= 1;
    }
 }
--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/main.c
+++ b/src/main.c
@@ -22,7 +22,7 @@
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "2.3.0";
+static const char *const Version = "2.4.2";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
@@ -127,6 +127,7 @@ void initialize_scan_context(scan_args_t *args) {
    ScanCtx.media_ctx.log = _log;
    ScanCtx.media_ctx.logf = _logf;
    ScanCtx.media_ctx.store = _store;
+    ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
    init_media();

    // OOXML
@@ -357,7 +358,10 @@ int main(int argc, const char *argv[]) {
            OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
            OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
            OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
-                                                                             "(see USAGE.md). DEFAULT: 0.0005"),
+                                                                                  "(see USAGE.md). DEFAULT: 0.0005"),
+            OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
+                        "Maximum memory buffer size per thread in MB for files inside archives "
+                        "(see USAGE.md). DEFAULT: 2000"),

            OPT_GROUP("Index options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
--- a/src/parsing/mime_generated.c
+++ b/src/parsing/mime_generated.c
@@ -885,6 +885,7 @@ g_hash_table_insert(ext_table, "frl", (gpointer)application_freeloader);
 g_hash_table_insert(ext_table, "spl", (gpointer)application_futuresplash);
 g_hash_table_insert(ext_table, "vew", (gpointer)application_groupwise);
 g_hash_table_insert(ext_table, "gz", (gpointer)application_gzip);
+g_hash_table_insert(ext_table, "tgz", (gpointer)application_gzip);
 g_hash_table_insert(ext_table, "hta", (gpointer)application_hta);
 g_hash_table_insert(ext_table, "unv", (gpointer)application_i_deas);
 g_hash_table_insert(ext_table, "iges", (gpointer)application_iges);
@@ -1387,6 +1388,7 @@ g_hash_table_insert(ext_table, "divx", (gpointer)video_x_msvideo);
 g_hash_table_insert(ext_table, "qtc", (gpointer)video_x_qtc);
 g_hash_table_insert(ext_table, "movie", (gpointer)video_x_sgi_movie);
 g_hash_table_insert(ext_table, "mv", (gpointer)video_x_sgi_movie);
+g_hash_table_insert(ext_table, "msg", (gpointer)application_vnd_ms_outlook);
 return ext_table;}
 GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
 g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
--- a/src/parsing/parse.c
+++ b/src/parsing/parse.c
@@ -78,6 +78,11 @@ void parse(void *arg) {

    if (doc.mime == 0 && !ScanCtx.fast) {
        // Get mime type with libmagic
+        if (!job->vfile.is_fs_file) {
+            LOG_WARNING(job->filepath, "Guessing mime type with libmagic inside archive files is not currently supported");
+            goto abort;
+        }
+
        bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
        if (bytes_read < 0) {

@@ -147,14 +152,13 @@ void parse(void *arg) {
        parse_mobi(&ScanCtx.mobi_ctx, &job->vfile, &doc);
    }

+    abort:
+
    //Parent meta
    if (!uuid_is_null(job->parent)) {
-        char tmp[UUID_STR_LEN];
-        uuid_unparse(job->parent, tmp);
-
        meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + UUID_STR_LEN + 1);
        meta_parent->key = MetaParent;
-        strcpy(meta_parent->str_val, tmp);
+        uuid_unparse(job->parent, meta_parent->str_val);
        APPEND_META((&doc), meta_parent)
    }

--- a/src/static/css/autocomplete.min.css
+++ b/src/static/css/autocomplete.min.css
@@ -0,0 +1,4 @@
+.autocomplete-suggestions { text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1); position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box; }
+.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
+.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
+.autocomplete-suggestion.selected { background: #f0f0f0; }
--- a/src/static/css/dark.css
+++ b/src/static/css/dark.css
@@ -266,6 +266,7 @@ mark {
    margin: 3px;
    white-space: normal;
    color: rgb(224, 224, 224);
+    overflow: hidden;
 }

 .irs-single, .irs-from, .irs-to {
--- a/src/static/css/light.css
+++ b/src/static/css/light.css
@@ -205,6 +205,7 @@ mark {
    margin: 3px;
    white-space: normal;
    color: #000;
+    overflow: hidden;
 }

 .irs-single, .irs-from, .irs-to {
--- a/src/static/js/auto-complete.min.js
+++ b/src/static/js/auto-complete.min.js
--- a/src/static/js/dom.js
+++ b/src/static/js/dom.js
@@ -27,18 +27,12 @@ function gifOver(thumbnail, hit) {
 }

 function getContentHighlight(hit) {
-    const re = RegExp(/<mark>/g);
-
-    const sortByMathCount = (a, b) => {
-        return b.match(re).length - a.match(re).length;
-    };
-
    if (hit.hasOwnProperty("highlight")) {
        if (hit["highlight"].hasOwnProperty("content")) {
-            return hit["highlight"]["content"].sort(sortByMathCount)[0];
+            return hit["highlight"]["content"][0];

        } else if (hit["highlight"].hasOwnProperty("content.nGram")) {
-            return hit["highlight"]["content.nGram"].sort(sortByMathCount)[0];
+            return hit["highlight"]["content.nGram"][0];
        }
    }

@@ -77,6 +71,7 @@ function shouldPlayVideo(hit) {

    return mime &&
        mime.startsWith("video/") &&
+        !("parent" in hit["_source"]) &&
        hit["_source"]["extension"] !== "mkv" &&
        hit["_source"]["extension"] !== "avi" &&
        videoc !== "hevc" &&
--- a/src/static/js/search.js
+++ b/src/static/js/search.js
@@ -74,6 +74,41 @@ function showEsError() {

 window.onload = () => {
    CONF.load();
+    new autoComplete({
+        selector: '#pathBar',
+        minChars: 1,
+        delay: 400,
+        renderItem: function (item) {
+            return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
+        },
+        source: async function (term, suggest) {
+
+            if (!CONF.options.suggestPath) {
+                return []
+            }
+
+            term = term.toLowerCase();
+
+            const choices = await getPathChoices();
+
+            let matches = [];
+            for (let i = 0; i < choices.length; i++) {
+                if (~choices[i].toLowerCase().indexOf(term)) {
+                    matches.push(choices[i]);
+                }
+            }
+            suggest(matches.sort());
+        },
+        onSelect: function () {
+            searchDebounced();
+        }
+    });
+    searchBar.addEventListener("keyup", searchDebounced);
+    pathBar.addEventListener("keyup", e => {
+        if (e.key === "Enter") {
+            searchDebounced();
+        }
+    });
 };

 function toggleFuzzy() {
@@ -105,10 +140,7 @@ $.jsonPost("i").then(resp => {
 });

 function getDocumentInfo(id) {
-    return $.getJSON("d/" + id).fail(e => {
-        console.log(e);
-        showEsError();
-    })
+    return $.getJSON("d/" + id).fail(showEsError)
 }

 function handleTreeClick(tree) {
@@ -332,24 +364,24 @@ function search(after = null) {

    let path = pathBar.value.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
    if (path !== "") {
-        filters.push([{term: {path: path}}])
+        filters.push({term: {path: path}})
    }
    let mimeTypes = getSelectedNodes(mimeTree);
    if (!mimeTypes.includes("any")) {
-        filters.push([{terms: {"mime": mimeTypes}}]);
+        filters.push({terms: {"mime": mimeTypes}});
    }

    let tags = getSelectedNodes(tagTree);
    if (!tags.includes("any")) {
-        filters.push([{terms: {"tag": tags}}]);
+        filters.push({terms: {"tag": tags}});
    }

    if (date_min && date_max) {
-        filters.push([{range: {mtime: {gte: date_min, lte: date_max}}}])
+        filters.push({range: {mtime: {gte: date_min, lte: date_max}}})
    } else if (date_min) {
-        filters.push([{range: {mtime: {gte: date_min}}}])
+        filters.push({range: {mtime: {gte: date_min}}})
    } else if (date_max) {
-        filters.push([{range: {mtime: {lte: date_max}}}])
+        filters.push({range: {mtime: {lte: date_max}}})
    }

    let q = {
@@ -385,6 +417,9 @@ function search(after = null) {
        q.highlight = {
            pre_tags: ["<mark>"],
            post_tags: ["</mark>"],
+            fragment_size: CONF.options.fragmentSize,
+            number_of_fragments: 1,
+            order: "score",
            fields: {
                content: {},
                // "content.nGram": {},
@@ -441,8 +476,6 @@ let searchDebounced = _.debounce(function () {
    search()
 }, 500);

-searchBar.addEventListener("keyup", searchDebounced);
-pathBar.addEventListener("keyup", searchDebounced);

 //Size slider
 $("#sizeSlider").ionRangeSlider({
@@ -607,7 +640,8 @@ function createPathTree(target) {
    let pathTree = new InspireTree({
        data: function (node, resolve, reject) {
            return getNextDepth(node);
-        }
+        },
+        sort: "text"
    });

    selectedIndices.forEach(index => {
@@ -627,3 +661,19 @@ function createPathTree(target) {
    pathTree.on("node.click", handlePathTreeClick(pathTree));
 }

+function getPathChoices() {
+    return new Promise(getPaths => {
+        $.jsonPost("es", {
+            suggest: {
+                path: {
+                    prefix: pathBar.value,
+                    completion: {
+                        field: "suggest-path",
+                        skip_duplicates: true,
+                        size: 10000
+                    }
+                }
+            }
+        }).then(resp => getPaths(resp["suggest"]["path"][0]["options"].map(opt => opt["_source"]["path"])));
+    })
+}
--- a/src/static/js/util.js
+++ b/src/static/js/util.js
@@ -100,6 +100,8 @@ const _defaults = {
    treemapGroupingDepth: 3,
    treemapColor: "PuBuGn",
    treemapSize: "large",
+    suggestPath: true,
+    fragmentSize: 100
 };

 function loadSettings() {
@@ -114,6 +116,8 @@ function loadSettings() {
    $("#settingTreemapColor").val(CONF.options.treemapColor);
    $("#settingTreemapSize").val(CONF.options.treemapSize);
    $("#settingTreemapType").val(CONF.options.treemapType);
+    $("#settingSuggestPath").prop("checked", CONF.options.suggestPath);
+    $("#settingFragmentSize").val(CONF.options.fragmentSize);
 }

 function Settings() {
@@ -155,6 +159,8 @@ function updateSettings() {
    CONF.options.treemapColor = $("#settingTreemapColor").val();
    CONF.options.treemapSize = $("#settingTreemapSize").val();
    CONF.options.treemapType = $("#settingTreemapType").val();
+    CONF.options.suggestPath = $("#settingSuggestPath").prop("checked");
+    CONF.options.fragmentSize = $("#settingFragmentSize").val();
    CONF.save();

    if (typeof searchDebounced !== "undefined") {
--- a/src/static/search.html
+++ b/src/static/search.html
@@ -11,7 +11,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.3.0</span>
+    <span class="badge badge-pill version">2.4.2</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a class="btn ml-auto" href="/stats">Stats</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
@@ -192,6 +192,17 @@
                        <label class="custom-control-label" for="settingSearchInPath">Enable matching query against document path</label>
                    </div>

+                    <div class="custom-control custom-checkbox">
+                        <input type="checkbox" class="custom-control-input" id="settingSuggestPath">
+                        <label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
+                    </div>
+
+                    <br/>
+                    <div class="form-group">
+                        <input type="number" class="form-control" id="settingFragmentSize">
+                        <label for="settingFragmentSize">Highlight context size in characters</label>
+                    </div>
+
                    <label for="settingDisplay">Display</label>
                    <select id="settingDisplay" class="form-control form-control-sm">
                        <option value="grid">Grid</option>
--- a/src/static/stats.html
+++ b/src/static/stats.html
@@ -10,7 +10,7 @@

 <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="/">sist2</a>
-    <span class="badge badge-pill version">2.3.0</span>
+    <span class="badge badge-pill version">2.4.2</span>
    <span class="tagline">Lightning-fast file system indexer and search tool </span>
    <a style="margin-left: auto" class="btn" href="/">Back</a>
    <button class="btn" type="button" data-toggle="modal" data-target="#settings"
@@ -77,6 +77,17 @@
                        path</label>
                </div>

+                <div class="custom-control custom-checkbox">
+                    <input type="checkbox" class="custom-control-input" id="settingSuggestPath">
+                    <label class="custom-control-label" for="settingSuggestPath">Enable auto-complete in path filter bar</label>
+                </div>
+
+                <br/>
+                <div class="form-group">
+                    <input type="number" class="form-control" id="settingFragmentSize">
+                    <label for="settingFragmentSize">Highlight context size in characters</label>
+                </div>
+
                <label for="settingDisplay">Display</label>
                <select id="settingDisplay" class="form-control form-control-sm">
                    <option value="grid">Grid</option>
--- a/src/stats.c
+++ b/src/stats.c
@@ -24,6 +24,10 @@ typedef struct {

 void fill_tables(cJSON *document, UNUSED(const char uuid_str[UUID_STR_LEN])) {

+    if (cJSON_GetObjectItem(document, "parent") != NULL) {
+        return;
+    }
+
    const char *json_path = cJSON_GetObjectItem(document, "path")->valuestring;
    char *path = malloc(strlen(json_path) + 1);
    strcpy(path, json_path);
@@ -167,7 +171,7 @@ int merge_up(double thresh) {

    int size = g_hash_table_size(FlatTree);

-    LOG_DEBUGF("stats.h", "Merge up iteration (%d merged, %d in tree)", count, size)
+    LOG_DEBUGF("stats.c", "Merge up iteration (%d merged, %d in tree)", count, size)
    return count;
 }

@@ -184,9 +188,9 @@ void csv_escape(char *dst, const char *str) {
        return;
    }

-    while (*ptr++ != 0) {
-        char c = *ptr;
-
+    *out++ = '"';
+    char c;
+    while ((c = *ptr++) != 0) {
        if (c == '"') {
            *out++ = '"';
            *out++ = '"';
@@ -194,6 +198,8 @@ void csv_escape(char *dst, const char *str) {
            *out++ = c;
        }
    }
+    *out++ = '"';
+    *out = '\0';
 }

 int open_or_exit(const char *path) {
--- a/src/util.c
+++ b/src/util.c
@@ -26,10 +26,11 @@ dyn_buffer_t url_escape(char *str) {
 }

 char *abspath(const char *path) {
-    wordexp_t w;
-    wordexp(path, &w, 0);

-    char *abs = realpath(w.we_wordv[0], NULL);
+    char *expanded = expandpath(path);
+
+    char *abs = realpath(expanded, NULL);
+    free(expanded);
    if (abs == NULL) {
        return NULL;
    }
@@ -38,16 +39,46 @@ char *abspath(const char *path) {
        strcat(abs, "/");
    }

-    wordfree(&w);
    return abs;
 }

-char *expandpath(const char *path) {
-    wordexp_t w;
-    wordexp(path, &w, 0);
+void shell_escape(char *dst, const char *src) {
+    const char *ptr = src;
+    char *out = dst;
+    while ((*ptr)) {
+        char c = *ptr++;

-    char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
-    strcpy(expanded, w.we_wordv[0]);
+        if (c == '&' || c == '\n' || c == '|' || c == ';' || c == '<' ||
+            c == '>' || c == '(' || c == ')' || c == '{' || c == '}') {
+            *out++ = '\\';
+        }
+        *out++ = c;
+    }
+    *out = 0;
+}
+
+char *expandpath(const char *path) {
+    char tmp[PATH_MAX * 2];
+
+    shell_escape(tmp, path);
+
+    wordexp_t w;
+    wordexp(tmp, &w, 0);
+
+    if (w.we_wordv == NULL) {
+        return NULL;
+    }
+
+    *tmp = '\0';
+    for (int i = 0; i < w.we_wordc; i++) {
+        strcat(tmp, w.we_wordv[i]);
+        if (i != w.we_wordc - 1) {
+            strcat(tmp, " ");
+        }
+    }
+
+    char *expanded = malloc(strlen(tmp) + 2);
+    strcpy(expanded, tmp);
    strcat(expanded, "/");

    wordfree(&w);
@@ -152,7 +183,7 @@ void str_escape(char *dst, const char *str) {
                    break;
                }

-                cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char)tmp[i]);
+                cur += sprintf(cur, "%c%02X", ESCAPE_CHAR, (unsigned char) tmp[i]);
            }
            continue;
        }
@@ -198,12 +229,12 @@ void str_unescape(char *dst, const char *str) {
            char next = *ptr;

            if (next == ESCAPE_CHAR) {
-                *cur++ = (char)c;
+                *cur++ = (char) c;
                ptr += 1;
            } else {
                tmp[0] = *(ptr);
                tmp[1] = *(ptr + 1);
-                *cur++ = (char)strtol(tmp, NULL, 16);
+                *cur++ = (char) strtol(tmp, NULL, 16);
                ptr += 2;
            }
        } else {
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c
--- a/third-party/libscan
+++ b/third-party/libscan
Author	SHA1	Message	Date
simon987	046edea0e2	Handle special characters in file paths	2020-06-10 19:45:36 -04:00
simon987	a011b7e97b	Fragment size setting	2020-06-09 21:40:53 -04:00
simon987	8c1c1697e0	Fix file wordexp in some paths #59	2020-06-05 19:41:02 -04:00
simon987	018b49fa4c	Fix csv_escape #58	2020-06-05 19:13:03 -04:00
simon987	27b4e6403e	Re-enable path autocomplete #54	2020-06-02 19:46:58 -04:00
simon987	13fdbd9e69	Fix for ES 7.7 #54	2020-06-01 18:14:34 -04:00
simon987	5e7fdaf8dd	Update issue-template.md	2020-06-01 10:45:43 -04:00
simon987	19d5c8ac9f	Update issue-template.md	2020-05-29 18:19:21 -04:00
simon987	99497049a8	Merge pull request #53 from dpieski/patch-1 Update README	2020-05-29 18:16:13 -04:00
Andrew	1a3181d78b	Update README changed case of path in a link to the usage guide to fix 404 error.	2020-05-29 15:37:20 -05:00
simon987	449aa77c8f	Fix for unknown mime inside archives	2020-05-25 17:36:04 -04:00
simon987	3058c55510	Memory leak fix #37	2020-05-24 15:42:42 -04:00
simon987	dedf9287b2	Fix name separation in `--archive list` mode	2020-05-24 14:36:59 -04:00
simon987	ab199b0c0c	Remove arc_reset() function because seek() inside archive doesn't work	2020-05-24 14:18:31 -04:00
simon987	c4fbae123e	Better support for media files inside archives	2020-05-24 14:10:23 -04:00
simon987	dd2397ef5c	handle .tgz #44 , ignore files inside archives for stats page	2020-05-24 10:10:28 -04:00