Fixed some bugs. Started auto complete

2025-04-24 12:35:51 +00:00 · 2018-04-11 13:46:05 -04:00 · 2018-04-11 13:46:05 -04:00 · 8b55c3b681
commit 8b55c3b681
parent 410261da41
16 changed files with 318 additions and 201 deletions
--- a/config.py
+++ b/config.py
@ -5,5 +5,6 @@ default_options = {
    "TextFileContentLenght": "16384",
    "MimeGuesser": "extension",  # extension, content
    "CheckSumCalculators": "",  # md5, sha1, sha256
-
 }
+
+index_every = 50000
--- a/crawler.py
+++ b/crawler.py
@ -10,7 +10,7 @@ from search import Search
 from thumbnail import ThumbnailGenerator
 from storage import Directory
 import shutil
-
+import config

 class RunningTask:

@ -26,9 +26,11 @@ class RunningTask:

 class Crawler:

-    def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser()):
+    def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0):
        self.documents = []
        self.enabled_parsers = enabled_parsers
+        self.indexer = indexer
+        self.dir_id = dir_id

        for parser in self.enabled_parsers:
            if parser.is_default:
@ -44,6 +46,8 @@ class Crawler:

    def crawl(self, root_dir: str, counter: Value=None):

+        document_counter = 0
+
        for root, dirs, files in os.walk(root_dir):

            for filename in files:
@ -53,6 +57,13 @@ class Crawler:

                parser = self.ext_map.get(mime, self.default_parser)

+                document_counter += 1
+                if document_counter >= config.index_every:
+                    document_counter = 0
+
+                    self.indexer.index(self.documents, self.dir_id)
+                    self.documents.clear()
+
                try:
                    if counter:
                        counter.value += 1
@ -64,6 +75,9 @@ class Crawler:
                except FileNotFoundError:
                    continue  # File was deleted

+        if self.indexer is not None:
+            self.indexer.index(self.documents, self.dir_id)
+
    def countFiles(self, root_dir: str):
        count = 0

@ -123,11 +137,9 @@ class TaskManager:
                     MediaFileParser(chksum_calcs),
                     TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLenght"))),
                     PictureFileParser(chksum_calcs)],
-                    mime_guesser)
+                    mime_guesser, self.indexer, directory.id)
        c.crawl(directory.path, counter)

-        # todo: create indexer inside the crawler and index every X files
-        Indexer("changeme").index(c.documents, directory.id)
        done.value = 1

    def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value):
--- a/indexer.py
+++ b/indexer.py
@ -61,17 +61,26 @@ class Indexer:
        self.es.indices.create(index=self.index_name)
        self.es.indices.close(index=self.index_name)

-        self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}', index=self.index_name)
-        self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}')
-        self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}')
-        self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer"}}}}')
+        self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
+                                     index=self.index_name)
+        self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}',
+                                     index=self.index_name)
+        self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
+                                     index=self.index_name)
+        self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase"]}}}}',
+                                     index=self.index_name)

        self.es.indices.put_mapping(body='{"properties": {'
                                    '"path": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
                                    '"suggest-path": {"type": "completion", "analyzer": "keyword"},'
                                    '"mime": {"type": "keyword"},'
                                    '"directory": {"type": "keyword"},'
-                                    '"name": {"analyzer": "my_nGram", "type": "text"}'
+                                    '"name": {"analyzer": "my_nGram", "type": "text"},'
+                                    '"album": {"analyzer": "my_nGram", "type": "text"},'
+                                    '"artist": {"analyzer": "my_nGram", "type": "text"},'
+                                    '"title": {"analyzer": "my_nGram", "type": "text"},'
+                                    '"genre": {"analyzer": "my_nGram", "type": "text"},'
+                                    '"album_artist": {"analyzer": "my_nGram", "type": "text"}'
                                    '}}', doc_type="file", index=self.index_name)

        self.es.indices.open(index=self.index_name)
--- a/parsing.py
+++ b/parsing.py
@ -5,6 +5,7 @@ import mimetypes
 import subprocess
 import json
 import chardet
+import html
 from PIL import Image

 class MimeGuesser:
@ -150,55 +151,55 @@ class MediaFileParser(GenericFileParser):
        super().__init__(checksum_calculators)

        self.mime_types = [
-            "video/3gpp",
-            "video/mp4",
-            "video/mpeg",
-            "video/ogg",
-            "video/quicktime",
-            "video/webm",
-            "video/x-flv",
-            "video/x-mng",
-            "video/x-ms-asf",
-            "video/x-ms-wmv",
-            "video/x-msvideo",
-            "audio/basic",
-            "auido/L24",
-            "audio/mid",
-            "audio/mpeg",
-            "audio/mp4",
-            "audio/x-aiff",
-            "audio/ogg",
-            "audio/vorbis"
-            "audio/x-realaudio",
-            "audio/x-wav"
+            "video/3gpp",  "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
+            "video/webm", "video/x-flv", "video/x-mng", "video/x-ms-asf",
+            "video/x-ms-wmv", "video/x-msvideo", "audio/basic", "auido/L24",
+            "audio/mid", "audio/mpeg", "audio/mp4", "audio/x-aiff",
+            "audio/ogg", "audio/vorbis" "audio/x-realaudio", "audio/x-wav",
+            "audio/flac", "audio/x-monkeys-audio", "audio/wav", "audio/wave",
+            "audio/x-wav", "audio/x-ms-wma"
        ]

    def parse(self, full_path: str):
        info = super().parse(full_path)

-        print("video/audio : " + full_path)
+        p = subprocess.Popen(["ffprobe", "-v", "quiet", "-print_format", "json=c=1", "-show_format", full_path],
+                             stdout=subprocess.PIPE)
+        out, err = p.communicate()

-        result = subprocess.run(["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", full_path],
-                                stdout=subprocess.PIPE)
+        try:
+            metadata = json.loads(out.decode("utf-8"))

-        metadata = json.loads(result.stdout.decode("utf-8"))
+            if "format" in metadata:

-        if "format" in metadata:
+                if "bit_rate" in metadata["format"]:
+                    info["bit_rate"] = int(metadata["format"]["bit_rate"])

-            if "bit_rate" in metadata["format"]:
-                info["bit_rate"] = int(metadata["format"]["bit_rate"])
+                if "nb_streams" in metadata["format"]:
+                    info["nb_streams"] = int(metadata["format"]["nb_streams"])

-            if "nb_streams" in metadata["format"]:
-                info["nb_streams"] = int(metadata["format"]["nb_streams"])
+                if "duration" in metadata["format"]:
+                    info["duration"] = float(metadata["format"]["duration"])

-            if "duration" in metadata["format"]:
-                info["duration"] = float(metadata["format"]["duration"])
+                if "format_name" in metadata["format"]:
+                    info["format_name"] = metadata["format"]["format_name"]

-            if "format_name" in metadata["format"]:
-                info["format_name"] = metadata["format"]["format_name"]
+                if "format_long_name" in metadata["format"]:
+                    info["format_long_name"] = metadata["format"]["format_long_name"]

-            if "format_long_name" in metadata["format"]:
-                info["format_long_name"] = metadata["format"]["format_long_name"]
+                if "tags" in metadata["format"]:
+                    if "genre" in metadata["format"]["tags"]:
+                        info["genre"] = metadata["format"]["tags"]["genre"]
+                    if "title" in metadata["format"]["tags"]:
+                        info["title"] = metadata["format"]["tags"]["title"]
+                    if "album" in metadata["format"]["tags"]:
+                        info["album"] = metadata["format"]["tags"]["album"]
+                    if "album_artist" in metadata["format"]["tags"]:
+                        info["album_artist"] = metadata["format"]["tags"]["album_artist"]
+
+        except json.decoder.JSONDecodeError:
+            print("json decode error:" + full_path)
+            pass

        return info

@ -211,60 +212,25 @@ class PictureFileParser(GenericFileParser):
        super().__init__(checksum_calculators)

        self.mime_types = [
-            "image/bmp",
-            "image/cgm",
-            "image/cis-cod",
-            "image/g3fax",
-            "image/gif",
-            "image/ief",
-            "image/jpeg",
-            "image/ktx",
-            "image/pipeg",
-            "image/pjpeg",
-            "image/png",
-            "image/prs.btif",
-            "image/svg+xml",
-            "image/tiff",
-            "image/vnd.adobe.photoshop",
-            "image/vnd.dece.graphic",
-            "image/vnd.djvu",
-            "image/vnd.dvb.subtitle",
-            "image/vnd.dwg",
-            "image/vnd.dxf",
-            "image/vnd.fastbidsheet",
-            "image/vnd.fpx",
-            "image/vnd.fst",
-            "image/vnd.fujixerox.edmics-mmr",
-            "image/vnd.fujixerox.edmics-rlc",
-            "image/vnd.ms-modi",
-            "image/vnd.net-fpx",
-            "image/vnd.wap.wbmp",
-            "image/vnd.xiff",
-            "image/webp",
-            "image/x-citrix-jpeg",
-            "image/x-citrix-png",
-            "image/x-cmu-raster",
-            "image/x-cmx",
-            "image/x-freehand",
-            "image/x-icon",
-            "image/x-pcx",
-            "image/x-pict",
-            "image/x-png",
-            "image/x-portable-anymap",
-            "image/x-portable-bitmap",
-            "image/x-portable-graymap",
-            "image/x-portable-pixmap",
-            "image/x-rgb",
-            "image/x-xbitmap",
-            "image/x-xpixmap",
-            "image/x-xwindowdump"
+            "image/bmp", "image/cgm",  "image/cis-cod", "image/g3fax", "image/gif",
+            "image/ief", "image/jpeg", "image/ktx", "image/pipeg",  "image/pjpeg",
+            "image/png", "image/prs.btif", "image/svg+xml", "image/tiff",
+            "image/vnd.adobe.photoshop", "image/vnd.dece.graphic", "image/vnd.djvu",
+            "image/vnd.dvb.subtitle", "image/vnd.dwg", "image/vnd.dxf",
+            "image/vnd.fastbidsheet", "image/vnd.fpx", "image/vnd.fst",
+            "image/vnd.fujixerox.edmics-mmr", "image/vnd.fujixerox.edmics-rlc",
+            "image/vnd.ms-modi", "image/vnd.net-fpx", "image/vnd.wap.wbmp",
+            "image/vnd.xiff", "image/webp", "image/x-citrix-jpeg", "image/x-citrix-png",
+            "image/x-cmu-raster", "image/x-cmx", "image/x-icon",
+            "image/x-pcx", "image/x-pict", "image/x-png", "image/x-portable-bitmap",
+            "image/x-portable-graymap", "image/x-portable-pixmap",
+            "image/x-rgb", "image/x-xbitmap", "image/x-xpixmap", "image/x-xwindowdump"
        ]

    def parse(self, full_path: str):

        info = super().parse(full_path)

-        print("picture")

        try:
            with open(full_path, "rb") as image_file:
@ -274,8 +240,7 @@ class PictureFileParser(GenericFileParser):
                    info["format"] = image.format
                    info["width"] = image.width
                    info["height"] = image.height
-        except OSError as e:
-            print(e.strerror)
+        except (OSError, ValueError) as e:
            pass

        return info
@ -290,58 +255,40 @@ class TextFileParser(GenericFileParser):
        self.content_lenght = content_lenght

        self.mime_types = [
-            "text/asp",
-            "text/css",
-            "text/ecmascript",
-            "text/html",
-            "text/javascript",
-            "text/mcf",
-            "text/pascal",
-            "text/plain",
-            "text/richtext",
-            "text/scriplet",
-            "text/sgml",
-            "text/tab-separated-values",
-            "text/uri-list",
-            "text/vnd.abc",
-            "text/vnd.fmi.flexstor",
-            "text/vnd.rn-realtext",
-            "text/vnd.wap.wml",
-            "text/vnd.wap.wmlscript",
-            "text/webviewhtml",
-            "text/x-asm",
-            "text/x-audiosoft-intra",
-            "text/x-c",
-            "text/x-component",
-            "text/x-fortran",
-            "text/x-h",
-            "text/x-java-source",
-            "text/x-la-asf",
-            "text/x-m",
-            "text/x-pascal",
-            "text/x-script",
-            "text/x-script.csh",
-            "text/x-script.elisp",
-            "text/x-script.guile",
-            "text/x-script.ksh",
-            "text/x-script.lisp",
-            "text/x-script.perl",
-            "text/x-script.perl-module",
-            "text/x-script.phyton",
-            "text/x-script.rexx",
-            "text/x-script.scheme",
-            "text/x-script.sh",
-            "text/x-script.tcl",
-            "text/x-script.tcsh",
-            "text/x-script.zsh",
-            "text/x-server-parsed-html",
-            "text/x-setext",
-            "text/x-sgml",
-            "text/x-speech",
-            "text/x-uil",
-            "text/x-uuencode",
-            "text/x-vcalendar",
-            "text/xml"
+            "text/asp", "text/css", "text/ecmascript", "text/html", "text/javascript",
+            "text/mcf",  "text/pascal", "text/plain",  "text/richtext", "text/scriplet",
+            "text/sgml",  "text/tab-separated-values", "text/uri-list", "text/vnd.abc",
+            "text/vnd.fmi.flexstor", "text/vnd.rn-realtext", "text/vnd.wap.wml",
+            "text/vnd.wap.wmlscript", "text/webviewhtml", "text/x-asm", "text/x-audiosoft-intra",
+            "text/x-c", "text/x-component", "text/x-fortran", "text/x-h", "text/x-java-source",
+            "text/x-la-asf",  "text/x-m", "text/x-pascal", "text/x-script",
+            "text/x-script.csh", "text/x-script.elisp", "text/x-script.guile",
+            "text/x-script.ksh", "text/x-script.lisp",  "text/x-script.perl",
+            "text/x-script.perl-module", "text/x-script.phyton", "text/x-script.rexx",
+            "text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
+            "text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
+            "text/x-setext", "text/x-sgml", "text/x-speech", "text/x-uil",
+            "text/x-uuencode", "text/x-vcalendar", "text/xml"
+        ]
+
+        self.encodings = [
+            'ascii', 'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437',
+            'cp500', 'cp720',  'cp737', 'cp775', 'cp850', 'cp852', 'cp855',
+            'cp856', 'cp857',  'cp858', 'cp860', 'cp861',  'cp862', 'cp863',
+            'cp864', 'cp865',  'cp866', 'cp869', 'cp874', 'cp875',  'cp932',
+            'cp949', 'cp950',  'cp1006', 'cp1026', 'cp1125', 'cp1140',
+            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
+            'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc_jp', 'euc_jis_2004',
+            'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp',
+            'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004', 'iso2022_jp_3',
+            'iso2022_jp_ext', 'iso2022_kr', 'latin_1', 'iso8859_2', 'iso8859_3',
+            'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8',
+            'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13', 'iso8859_14',
+            'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_t', 'koi8_u',
+            'kz1048', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2',
+            'mac_roman', 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004',
+            'shift_jisx0213', 'utf_32', 'utf_32_be', 'utf_32_le', 'utf_16', 'utf_16_be',
+            'utf_16_le', 'utf_7', 'utf_8', 'utf_8_sig'
        ]

    def parse(self, full_path: str):
@ -355,12 +302,11 @@ class TextFileParser(GenericFileParser):
            chardet.detect(raw_content)
            encoding = chardet.detect(raw_content)["encoding"]

-            if encoding is not None:
-
-                print(full_path)
-                print(encoding)
+            if encoding is not None and encoding in self.encodings:

                info["encoding"] = encoding
-                info["content"] = raw_content.decode(encoding, "ignore")
+                content = raw_content.decode(encoding, "ignore")
+
+                info["content"] = html.escape(content)

        return info
--- a/search.py
+++ b/search.py
@ -22,7 +22,7 @@ class Search:
    def get_all_documents(self, dir_id: int):

        return helpers.scan(client=self.es,
-                            query={"_source": {"includes": ["path", "name"]},
+                            query={"_source": {"includes": ["path", "name", "mime", "extension"]},
                                   "query": {"term": {"directory": dir_id}}},
                            index=self.index_name)

@ -58,7 +58,8 @@ class Search:
        page = self.es.search(body={"query":
            {"multi_match": {
                "query": query,
-                "fields": ["name", "content"]
+                "fields": ["name", "content", "album", "artist", "title", "genre", "album_artist"],
+                "operator": "and"
            }},
            "sort": [
                "_score"
@ -74,10 +75,14 @@ class Search:
                    "prefix": query,
                    "completion": {
                        "field": "suggest-path",
-                        "skip_duplicates": True
+                        "skip_duplicates": True,
+                        "size": 4000
                    }
                }
            },
+            "aggs": {
+                "total_size": {"sum": {"field": "size"}}
+            },
            "size": 40}, index=self.index_name, scroll="3m")

        return page
--- a/spec/TextFileParser_spec.py
+++ b/spec/TextFileParser_spec.py
@ -6,10 +6,10 @@ class TextFileParserTest(TestCase):

    def test_parse_csv(self):

-        parser = TextFileParser([], 12345)
+        parser = TextFileParser([], 1234)

        info = parser.parse("test_files/text.csv")

        self.assertTrue(info["content"].startswith("rosbagTimestamp,header,seq,stamp,secs,nsecs,"))
-        self.assertEqual(len(info["content"]), 12345)
+        self.assertEqual(len(info["content"]), 1309)  # Size is larger because of html escaping
        self.assertEqual(info["encoding"], "ascii")
--- a/spec/ThumbnailGenerator_spec.py
+++ b/spec/ThumbnailGenerator_spec.py
@ -11,7 +11,7 @@ class ThumbnailGeneratorTest(TestCase):

        generator = ThumbnailGenerator(300)
        # Original image is 420x315
-        generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg")
+        generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg", "image/JPEG")

        img = Image.open("test_thumb1.jpg")
        width, height = img.size
--- a/static/css/auto-complete.css
+++ b/static/css/auto-complete.css
@ -0,0 +1,9 @@
+.autocomplete-suggestions {
+    text-align: left; cursor: default; border: 1px solid #ccc; border-top: 0; background: #fff; box-shadow: -1px 1px 3px rgba(0,0,0,.1);
+
+    /* core styles should not be changed */
+    position: absolute; display: none; z-index: 9999; max-height: 254px; overflow: hidden; overflow-y: auto; box-sizing: border-box;
+}
+.autocomplete-suggestion { position: relative; padding: 0 .6em; line-height: 23px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; font-size: 1.02em; color: #333; }
+.autocomplete-suggestion b { font-weight: normal; color: #1f8dd6; }
+.autocomplete-suggestion.selected { background: #f0f0f0; }
--- a/static/css/bootstrap-slider.min.css
+++ b/static/css/bootstrap-slider.min.css
--- a/static/css/bootstrap.min.js
+++ b/static/css/bootstrap.min.js
--- a/static/js/auto-complete.min.js
+++ b/static/js/auto-complete.min.js
--- a/static/js/bootstrap-slider.min.js
+++ b/static/js/bootstrap-slider.min.js
--- a/templates/layout.html
+++ b/templates/layout.html
@ -8,9 +8,13 @@
    <!-- Demo Dependencies -->
    <script src="/static/js/popper.min.js" type="text/javascript"></script>
    <script src="/static/js/jquery.min.js" type="text/javascript"></script>
-    <script src="/static/js/bootstrap.min.js" type="text/javascript"></script>
+    <script src="/static/css/bootstrap.min.js" type="text/javascript"></script>
    <script src="/static/js/Chart.min.js" type="text/javascript"></script>
    <link rel="stylesheet" href="/static/css/bootstrap.min.css">
+    <link rel="stylesheet" href="/static/css/auto-complete.css">
+    <script src="/static/js/auto-complete.min.js" type="text/javascript"></script>
+
+

    <link href="/static/css/fontawesome-all.min.css" rel="stylesheet" type="text/css">

@ -22,6 +26,7 @@
        .info-table pre {
            padding: 6px;
            margin: 4px;
+            white-space: unset;
        }

        .info-table td {
--- a/templates/search.html
+++ b/templates/search.html
@ -29,6 +29,11 @@
            background-color: #AA99C9;
        }

+        .badge-audio {
+            color: #FFFFFF;
+            background-color: #00ADEF;
+        }
+
        .badge-resolution {
            color: #212529;
            background-color: #FFC107;
@ -92,6 +97,16 @@
        .hl {
            color: red;
        }
+
+        .content-div {
+            font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;
+            font-size: 13px;
+            padding: 1em;
+            background-color: #f5f5f5;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            margin: 3px;
+        }
    </style>

    <div class="container">
@ -99,17 +114,32 @@
        <div class="card">
            {#            <div class="card-header">An excellent form</div>#}
            <div class="card-body">
-                <div class="input-group mb-2">
-                    <div class="input-group-prepend">
-                        <div class="input-group-text"></div>
-                    </div>
-                    <input id="searchBar" type="search" class="form-control" placeholder="Search">
-
-                </div>
+                <input id="pathBar" type="search" class="form-control" placeholder="Path">
+                <input id="searchBar" type="search" class="form-control" placeholder="Search">

            </div>
        </div>

+        <script>
+
+            new autoComplete({
+                selector: '#pathBar',
+                minChars: 1,
+                source: function(term, suggest) {
+                    term = term.toLowerCase();
+                    var choices = pathAutoComplete;
+
+                    var matches = [];
+                    for (var i=0; i<choices.length; i++) {
+                        if (~choices[i].toLowerCase().indexOf(term)) {
+                            matches.push(choices[i]);
+                        }
+                    }
+                    suggest(matches);
+                }
+            });
+        </script>
+
        <div id="searchResults">

        </div>
@ -130,7 +160,12 @@

                var stat = document.createElement("p");
                stat.appendChild(document.createTextNode(searchResult["hits"]["total"] + " results in " + searchResult["took"] + "ms"));
+
+                var sizeStat = document.createElement("span");
+                sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
+
                statsCardBody.appendChild(stat);
+                statsCardBody.appendChild(sizeStat);
                statsCard.appendChild(statsCardBody);

                return statsCard;
@ -284,9 +319,15 @@
                //Title
                var title = document.createElement("p");
                title.setAttribute("class", "file-title");
-                var extention = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== null ? "." + hit["_source"]["extension"] : "";
-                title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extention);
-                title.setAttribute("title", hit["_source"]["path"]);
+                var extension = hit["_source"].hasOwnProperty("extension") && hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
+
+                if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
+                    title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
+                } else {
+                    title.appendChild(document.createTextNode(hit["_source"]["name"]));
+                }
+
+                title.setAttribute("title", hit["_source"]["path"] + hit["_source"]["name"] + extension);
                docCard.appendChild(title);

                var tagContainer = document.createElement("div");
@ -323,7 +364,9 @@
                            //Resolution
                            var resolutionBadge = document.createElement("span");
                            resolutionBadge.setAttribute("class", "badge badge-resolution");
-                            resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
+                            if (hit["_source"].hasOwnProperty("width")) {
+                                resolutionBadge.appendChild(document.createTextNode(hit["_source"]["width"] + "x" + hit["_source"]["height"]));
+                            }
                            thumbnailOverlay.appendChild(resolutionBadge);

                            var format = hit["_source"]["format"];
@ -365,6 +408,13 @@
                            formatTag.appendChild(document.createTextNode(format));
                            tags.push(formatTag);

+                            break;
+                        case "audio":
+                            formatTag = document.createElement("span");
+                            formatTag.setAttribute("class", "badge badge-pill badge-audio");
+                            formatTag.appendChild(document.createTextNode(hit["_source"]["format_name"]));
+                            tags.push(formatTag);
+
                            break;
                    }

@ -372,31 +422,38 @@
                    if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("content")) {

                        var contentDiv = document.createElement("div");
-                        contentDiv.innerHTML = hit["highlight"]["content"][0];
+                        contentDiv.setAttribute("class", "content-div bg-light");
+                        contentDiv.insertAdjacentHTML('afterbegin', hit["highlight"]["content"][0]);
                        docCard.appendChild(contentDiv);
                    }

-                    //Size tag
-                    var sizeTag = document.createElement("small");
-                    sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
-                    sizeTag.setAttribute("class", "text-muted");
+                    //Audio
+                    if (mimeCategory === "audio") {
+
+                    }
+
+                    if (thumbnail !== null) {
+                        imgWrapper.appendChild(thumbnail);
+                        docCard.appendChild(imgWrapper);
+                    }
+                    if (thumbnailOverlay !== null) {
+                        imgWrapper.appendChild(thumbnailOverlay);
+                    }

                    for (var i = 0; i < tags.length; i++) {
                        tagContainer.appendChild(tags[i]);
                    }

-                    tagContainer.appendChild(sizeTag);

-                    if (thumbnail !== null) {
-                        imgWrapper.appendChild(thumbnail);
-                        docCard.appendChild(imgWrapper);
-
-                    }
-                    if (thumbnailOverlay !== null) {
-                        imgWrapper.appendChild(thumbnailOverlay);
-                    }
                }

+                //Size tag
+                var sizeTag = document.createElement("small");
+                sizeTag.appendChild(document.createTextNode(humanFileSize(hit["_source"]["size"])));
+                sizeTag.setAttribute("class", "text-muted");
+                tagContainer.appendChild(sizeTag);
+
+
                //Download button
                downloadPopover(docCard, hit["_id"]);

@ -463,6 +520,8 @@
                }
            });

+            var pathAutoComplete;
+
            searchBar.addEventListener("keyup", function () {

                //Clear old search results
@ -487,11 +546,21 @@
                        //Search stats
                        searchResults.appendChild(makeStatsCard(searchResult));

+                        //Autocomplete
+                        if (searchResult.hasOwnProperty("suggest") && searchResult["suggest"].hasOwnProperty("path")) {
+                            pathAutoComplete = [];
+                            for (var i = 0; i < searchResult["suggest"]["path"][0]["options"].length; i++) {
+                                pathAutoComplete.push(searchResult["suggest"]["path"][0]["options"][i].text)
+                            }
+                        }
+
+
                        //Setup page
                        var resultContainer = makeResultContainer();
                        searchResults.appendChild(resultContainer);

                        //Insert search results (hits)
+                        docCount = 0;
                        insertHits(resultContainer, searchResult["hits"]["hits"]);

                        //Initialise download/view button popover
@ -505,5 +574,4 @@
        </script>
    </div>

-
 {% endblock body %}
--- a/templates/task.html
+++ b/templates/task.html
@ -79,14 +79,22 @@

                        try {

-                            var bar = document.getElementById("task-bar-" + currentTask.id);
-                            bar.setAttribute("style", "width: " + percent + "%;");
-                            document.getElementById("task-label-" + currentTask.id).innerHTML = currentTask.parsed + " / " + currentTask.total + "  (" + percent.toFixed(2) + "%)";
+                            if (currentTask.total === 0) {

-                            if (percent === 100) {
-                                bar.classList.add("bg-success")
+                                document.getElementById("task-label-" + currentTask.id).innerHTML = "Calculating file count...";
+
+                            } else {
+                                var bar = document.getElementById("task-bar-" + currentTask.id);
+                                bar.setAttribute("style", "width: " + percent + "%;");
+                                document.getElementById("task-label-" + currentTask.id).innerHTML = currentTask.parsed + " / " + currentTask.total + "  (" + percent.toFixed(2) + "%)";
+
+                                if (percent === 100) {
+                                    bar.classList.add("bg-success")
+                                }
                            }

+
+
                        } catch (e) {
                            window.reload();
                        }
--- a/thumbnail.py
+++ b/thumbnail.py
@ -9,18 +9,18 @@ class ThumbnailGenerator:

    def __init__(self, size, quality=85, color="FF00FF"):
        self.size = (size, size)
-        self.mime_guesser = ContentMimeGuesser()
        self.quality = quality
        self.color = tuple(bytes.fromhex(color))

-    def generate(self, path, dest_path):
+    def generate(self, path, dest_path, mime):

-        mime = self.mime_guesser.guess_mime(path)
+        if mime is None:
+            return

        if mime.startswith("image"):
+
            try:
                self.generate_image(path, dest_path)
-                pass
            except OSError:
                print("Not an image " + path)

@ -36,16 +36,17 @@ class ThumbnailGenerator:
            except Exception as e:
                print("Couldn't make thumbnail for " + path)

-    def generate_all(self, docs, dest_path, counter: Value=None):
+    def generate_all(self, docs, dest_path,  counter: Value=None):

        os.makedirs(dest_path, exist_ok=True)

        for doc in docs:

-            full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"])
+            extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
+            full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)

-            if os.path.isfile(full_path):
-                self.generate(full_path, os.path.join(dest_path, doc["_id"]))
+            if os.path.isfile(full_path) and "mime" in doc["_source"]:
+                self.generate(full_path, os.path.join(dest_path, doc["_id"]), doc["_source"]["mime"])

            if counter is not None:
                counter.value += 1
@ -54,6 +55,11 @@ class ThumbnailGenerator:
        with open(path, "rb") as image_file:
            with Image.open(image_file) as image:

+                # https://stackoverflow.com/questions/43978819
+                if image.mode == "I;16":
+                    image.mode = "I"
+                    image.point(lambda i: i * (1. / 256)).convert('L')
+
                image.thumbnail(self.size, Image.BICUBIC)
                canvas = Image.new("RGB", image.size, self.color)

@ -68,4 +74,3 @@ class ThumbnailGenerator:

                canvas.save(dest_path, "JPEG", quality=self.quality, optimize=True)
                canvas.close()
-