diff --git a/config.py b/config.py
index ac844e6..25f5d3d 100644
--- a/config.py
+++ b/config.py
@@ -21,7 +21,7 @@ nGramMax = 3
elasticsearch_url = "http://localhost:9200"
# Password hashing
-bcrypt_rounds = 14
+bcrypt_rounds = 13
# sqlite3 database path
db_path = "./local_storage.db"
@@ -29,7 +29,7 @@ db_path = "./local_storage.db"
allow_guests = True
# Number of threads used for parsing
-parse_threads = 8
+parse_threads = 32
# Number of threads used for thumbnail generation
tn_threads = 32
diff --git a/crawler.py b/crawler.py
index c14bc4d..be72d5e 100644
--- a/crawler.py
+++ b/crawler.py
@@ -115,6 +115,8 @@ class Crawler:
doc = parser.parse(full_path)
doc["mime"] = mime
out_q.put(doc)
+ except:
+ pass
finally:
in_q.task_done()
@@ -123,7 +125,7 @@ class Crawler:
if self.indexer is None:
while True:
try:
- doc = out_q.get(timeout=10)
+ doc = out_q.get(timeout=120)
if doc is None:
break
except Empty:
@@ -134,10 +136,11 @@ class Crawler:
while True:
try:
- doc = out_q.get(timeout=10)
+ doc = out_q.get(timeout=120)
if doc is None:
break
except Empty:
+ print("outq empty")
break
try:
@@ -171,7 +174,6 @@ class TaskManager:
directory = self.storage.dirs()[task.dir_id]
if task.type == Task.INDEX:
- c = Crawler([])
self.current_process = Process(target=self.execute_crawl, args=(directory,
self.current_task.parsed_files,
self.current_task.done,
@@ -236,8 +238,9 @@ class TaskManager:
def check_new_task(self):
if self.current_task is None:
- for i in sorted(self.storage.tasks(), reverse=True):
- self.start_task(self.storage.tasks()[i])
+ tasks = self.storage.tasks()
+ if len(tasks) > 0:
+ self.start_task(tasks[sorted(tasks)[0]])
else:
if self.current_task.done.value == 1:
self.current_process.terminate()
diff --git a/indexer.py b/indexer.py
index 19613a2..2b881ab 100644
--- a/indexer.py
+++ b/indexer.py
@@ -57,7 +57,8 @@ class Indexer:
"analysis": {"tokenizer": {"path_tokenizer": {"type": "path_hierarchy"}}}},
index=self.index_name)
self.es.indices.put_settings(body={
- "analysis": {"tokenizer": {"my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}},
+ "analysis": {"tokenizer": {
+ "my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}},
index=self.index_name)
self.es.indices.put_settings(body={
"analysis": {"analyzer": {"path_analyser": {"tokenizer": "path_tokenizer", "filter": ["lowercase"]}}}},
@@ -83,7 +84,9 @@ class Indexer:
"mtime": {"type": "integer"},
"size": {"type": "long"},
"directory": {"type": "short"},
- "name": {"analyzer": "my_nGram", "type": "text"},
+ "name": {"analyzer": "content_analyser", "type": "text",
+ "fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}}
+ },
"album": {"analyzer": "my_nGram", "type": "text"},
"artist": {"analyzer": "my_nGram", "type": "text"},
"title": {"analyzer": "my_nGram", "type": "text"},
diff --git a/run.py b/run.py
index 34a735a..09a6f67 100644
--- a/run.py
+++ b/run.py
@@ -1,4 +1,5 @@
import json
+import logging
import os
import shutil
from io import BytesIO
@@ -19,6 +20,10 @@ app = Flask(__name__)
app.secret_key = "A very secret key"
storage = LocalStorage(config.db_path)
+# Disable flask logging
+flaskLogger = logging.getLogger('werkzeug')
+flaskLogger.setLevel(logging.ERROR)
+
tm = TaskManager(storage)
search = Search("changeme")
diff --git a/search.py b/search.py
index 81e21ec..6402107 100644
--- a/search.py
+++ b/search.py
@@ -128,9 +128,9 @@ class Search:
condition: {
"multi_match": {
"query": query,
- "fields": ["name", "content", "album", "artist", "title", "genre",
- "album_artist", "font_name"],
- "operator": "and"
+ "fields": ["name^3", "name.nGram^2", "content", "album^4", "artist^4", "title^4", "genre",
+ "album_artist^4", "font_name^2"],
+ "operator": "or"
}
},
"filter": filters
@@ -141,15 +141,16 @@ class Search:
],
"highlight": {
"fields": {
- "content": {"pre_tags": [""], "post_tags": [""]},
- "name": {"pre_tags": [""], "post_tags": [""]},
- "font_name": {"pre_tags": [""], "post_tags": [""]},
+ "content": {"pre_tags": [""], "post_tags": [""]},
+ "name": {"pre_tags": [""], "post_tags": [""]},
+ "name.nGram": {"pre_tags": [""], "post_tags": [""]},
+ "font_name": {"pre_tags": [""], "post_tags": [""]},
}
},
"aggs": {
"total_size": {"sum": {"field": "size"}}
},
- "size": 40}, index=self.index_name, scroll="30m")
+ "size": 40}, index=self.index_name, scroll="15m")
return page
@@ -189,14 +190,18 @@ class Search:
return None
def delete_directory(self, dir_id):
-
- try:
- self.es.delete_by_query(body={"query": {
- "bool": {
- "filter": {"term": {"directory": dir_id}}
- }
- }}, index=self.index_name)
- except elasticsearch.exceptions.ConflictError:
- print("Error: multiple delete tasks at the same time")
+ while True:
+ try:
+ self.es.delete_by_query(body={"query": {
+ "bool": {
+ "filter": {"term": {"directory": dir_id}}
+ }
+ }}, index=self.index_name, request_timeout=60)
+ break
+ except elasticsearch.exceptions.ConflictError:
+ print("Error: multiple delete tasks at the same time")
+ except Exception as e:
+ print(e)
+
diff --git a/static/css/search.css b/static/css/search.css
index 3fdaa96..4a1b761 100644
--- a/static/css/search.css
+++ b/static/css/search.css
@@ -94,8 +94,10 @@ body {overflow-y:scroll;}
}
}
-.hl {
+mark {
background: #fff217;
+ border-radius: 0;
+ padding: 1px 0;
}
.content-div {
diff --git a/static/js/search.js b/static/js/search.js
index 2bdfb16..42e7360 100644
--- a/static/js/search.js
+++ b/static/js/search.js
@@ -206,6 +206,8 @@ function createDocCard(hit) {
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
+ } else if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name.nGram")) {
+ title.insertAdjacentHTML('afterbegin', hit["highlight"]["name.nGram"] + extension);
} else {
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
@@ -491,7 +493,6 @@ function search() {
}
}
-
//Setup page
let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer);