diff --git a/config.py b/config.py index ac844e6..25f5d3d 100644 --- a/config.py +++ b/config.py @@ -21,7 +21,7 @@ nGramMax = 3 elasticsearch_url = "http://localhost:9200" # Password hashing -bcrypt_rounds = 14 +bcrypt_rounds = 13 # sqlite3 database path db_path = "./local_storage.db" @@ -29,7 +29,7 @@ db_path = "./local_storage.db" allow_guests = True # Number of threads used for parsing -parse_threads = 8 +parse_threads = 32 # Number of threads used for thumbnail generation tn_threads = 32 diff --git a/crawler.py b/crawler.py index c14bc4d..be72d5e 100644 --- a/crawler.py +++ b/crawler.py @@ -115,6 +115,8 @@ class Crawler: doc = parser.parse(full_path) doc["mime"] = mime out_q.put(doc) + except: + pass finally: in_q.task_done() @@ -123,7 +125,7 @@ class Crawler: if self.indexer is None: while True: try: - doc = out_q.get(timeout=10) + doc = out_q.get(timeout=120) if doc is None: break except Empty: @@ -134,10 +136,11 @@ class Crawler: while True: try: - doc = out_q.get(timeout=10) + doc = out_q.get(timeout=120) if doc is None: break except Empty: + print("outq empty") break try: @@ -171,7 +174,6 @@ class TaskManager: directory = self.storage.dirs()[task.dir_id] if task.type == Task.INDEX: - c = Crawler([]) self.current_process = Process(target=self.execute_crawl, args=(directory, self.current_task.parsed_files, self.current_task.done, @@ -236,8 +238,9 @@ class TaskManager: def check_new_task(self): if self.current_task is None: - for i in sorted(self.storage.tasks(), reverse=True): - self.start_task(self.storage.tasks()[i]) + tasks = self.storage.tasks() + if len(tasks) > 0: + self.start_task(tasks[sorted(tasks)[0]]) else: if self.current_task.done.value == 1: self.current_process.terminate() diff --git a/indexer.py b/indexer.py index 19613a2..2b881ab 100644 --- a/indexer.py +++ b/indexer.py @@ -57,7 +57,8 @@ class Indexer: "analysis": {"tokenizer": {"path_tokenizer": {"type": "path_hierarchy"}}}}, index=self.index_name) self.es.indices.put_settings(body={ - "analysis": {"tokenizer": {"my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}}, + "analysis": {"tokenizer": { + "my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}}, index=self.index_name) self.es.indices.put_settings(body={ "analysis": {"analyzer": {"path_analyser": {"tokenizer": "path_tokenizer", "filter": ["lowercase"]}}}}, @@ -83,7 +84,9 @@ class Indexer: "mtime": {"type": "integer"}, "size": {"type": "long"}, "directory": {"type": "short"}, - "name": {"analyzer": "my_nGram", "type": "text"}, + "name": {"analyzer": "content_analyser", "type": "text", + "fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}} + }, "album": {"analyzer": "my_nGram", "type": "text"}, "artist": {"analyzer": "my_nGram", "type": "text"}, "title": {"analyzer": "my_nGram", "type": "text"}, diff --git a/run.py b/run.py index 34a735a..09a6f67 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,5 @@ import json +import logging import os import shutil from io import BytesIO @@ -19,6 +20,10 @@ app = Flask(__name__) app.secret_key = "A very secret key" storage = LocalStorage(config.db_path) +# Disable flask logging +flaskLogger = logging.getLogger('werkzeug') +flaskLogger.setLevel(logging.ERROR) + tm = TaskManager(storage) search = Search("changeme") diff --git a/search.py b/search.py index 81e21ec..6402107 100644 --- a/search.py +++ b/search.py @@ -128,9 +128,9 @@ class Search: condition: { "multi_match": { "query": query, - "fields": ["name", "content", "album", "artist", "title", "genre", - "album_artist", "font_name"], - "operator": "and" + "fields": ["name^3", "name.nGram^2", "content", "album^4", "artist^4", "title^4", "genre", + "album_artist^4", "font_name^2"], + "operator": "or" } }, "filter": filters @@ -141,15 +141,16 @@ class Search: ], "highlight": { "fields": { - "content": {"pre_tags": [""], "post_tags": [""]}, - "name": {"pre_tags": [""], "post_tags": [""]}, - "font_name": {"pre_tags": [""], "post_tags": [""]}, + "content": {"pre_tags": [""], "post_tags": [""]}, + "name": {"pre_tags": [""], "post_tags": [""]}, + "name.nGram": {"pre_tags": [""], "post_tags": [""]}, + "font_name": {"pre_tags": [""], "post_tags": [""]}, } }, "aggs": { "total_size": {"sum": {"field": "size"}} }, - "size": 40}, index=self.index_name, scroll="30m") + "size": 40}, index=self.index_name, scroll="15m") return page @@ -189,14 +190,18 @@ class Search: return None def delete_directory(self, dir_id): - - try: - self.es.delete_by_query(body={"query": { - "bool": { - "filter": {"term": {"directory": dir_id}} - } - }}, index=self.index_name) - except elasticsearch.exceptions.ConflictError: - print("Error: multiple delete tasks at the same time") + while True: + try: + self.es.delete_by_query(body={"query": { + "bool": { + "filter": {"term": {"directory": dir_id}} + } + }}, index=self.index_name, request_timeout=60) + break + except elasticsearch.exceptions.ConflictError: + print("Error: multiple delete tasks at the same time") + except Exception as e: + print(e) + diff --git a/static/css/search.css b/static/css/search.css index 3fdaa96..4a1b761 100644 --- a/static/css/search.css +++ b/static/css/search.css @@ -94,8 +94,10 @@ body {overflow-y:scroll;} } } -.hl { +mark { background: #fff217; + border-radius: 0; + padding: 1px 0; } .content-div { diff --git a/static/js/search.js b/static/js/search.js index 2bdfb16..42e7360 100644 --- a/static/js/search.js +++ b/static/js/search.js @@ -206,6 +206,8 @@ function createDocCard(hit) { if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) { title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension); + } else if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name.nGram")) { + title.insertAdjacentHTML('afterbegin', hit["highlight"]["name.nGram"] + extension); } else { title.appendChild(document.createTextNode(hit["_source"]["name"] + extension)); } @@ -491,7 +493,6 @@ function search() { } } - //Setup page let resultContainer = makeResultContainer(); searchResults.appendChild(resultContainer);