multithreading fixes and optimizations

This commit is contained in:
simon987 2019-04-05 19:59:21 -04:00
parent 60349cf954
commit 2e353c9787
7 changed files with 46 additions and 27 deletions

View File

@ -21,7 +21,7 @@ nGramMax = 3
elasticsearch_url = "http://localhost:9200" elasticsearch_url = "http://localhost:9200"
# Password hashing # Password hashing
bcrypt_rounds = 14 bcrypt_rounds = 13
# sqlite3 database path # sqlite3 database path
db_path = "./local_storage.db" db_path = "./local_storage.db"
@ -29,7 +29,7 @@ db_path = "./local_storage.db"
allow_guests = True allow_guests = True
# Number of threads used for parsing # Number of threads used for parsing
parse_threads = 8 parse_threads = 32
# Number of threads used for thumbnail generation # Number of threads used for thumbnail generation
tn_threads = 32 tn_threads = 32

View File

@ -115,6 +115,8 @@ class Crawler:
doc = parser.parse(full_path) doc = parser.parse(full_path)
doc["mime"] = mime doc["mime"] = mime
out_q.put(doc) out_q.put(doc)
except:
pass
finally: finally:
in_q.task_done() in_q.task_done()
@ -123,7 +125,7 @@ class Crawler:
if self.indexer is None: if self.indexer is None:
while True: while True:
try: try:
doc = out_q.get(timeout=10) doc = out_q.get(timeout=120)
if doc is None: if doc is None:
break break
except Empty: except Empty:
@ -134,10 +136,11 @@ class Crawler:
while True: while True:
try: try:
doc = out_q.get(timeout=10) doc = out_q.get(timeout=120)
if doc is None: if doc is None:
break break
except Empty: except Empty:
print("outq empty")
break break
try: try:
@ -171,7 +174,6 @@ class TaskManager:
directory = self.storage.dirs()[task.dir_id] directory = self.storage.dirs()[task.dir_id]
if task.type == Task.INDEX: if task.type == Task.INDEX:
c = Crawler([])
self.current_process = Process(target=self.execute_crawl, args=(directory, self.current_process = Process(target=self.execute_crawl, args=(directory,
self.current_task.parsed_files, self.current_task.parsed_files,
self.current_task.done, self.current_task.done,
@ -236,8 +238,9 @@ class TaskManager:
def check_new_task(self): def check_new_task(self):
if self.current_task is None: if self.current_task is None:
for i in sorted(self.storage.tasks(), reverse=True): tasks = self.storage.tasks()
self.start_task(self.storage.tasks()[i]) if len(tasks) > 0:
self.start_task(tasks[sorted(tasks)[0]])
else: else:
if self.current_task.done.value == 1: if self.current_task.done.value == 1:
self.current_process.terminate() self.current_process.terminate()

View File

@ -57,7 +57,8 @@ class Indexer:
"analysis": {"tokenizer": {"path_tokenizer": {"type": "path_hierarchy"}}}}, "analysis": {"tokenizer": {"path_tokenizer": {"type": "path_hierarchy"}}}},
index=self.index_name) index=self.index_name)
self.es.indices.put_settings(body={ self.es.indices.put_settings(body={
"analysis": {"tokenizer": {"my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}}, "analysis": {"tokenizer": {
"my_nGram_tokenizer": {"type": "nGram", "min_gram": config.nGramMin, "max_gram": config.nGramMax}}}},
index=self.index_name) index=self.index_name)
self.es.indices.put_settings(body={ self.es.indices.put_settings(body={
"analysis": {"analyzer": {"path_analyser": {"tokenizer": "path_tokenizer", "filter": ["lowercase"]}}}}, "analysis": {"analyzer": {"path_analyser": {"tokenizer": "path_tokenizer", "filter": ["lowercase"]}}}},
@ -83,7 +84,9 @@ class Indexer:
"mtime": {"type": "integer"}, "mtime": {"type": "integer"},
"size": {"type": "long"}, "size": {"type": "long"},
"directory": {"type": "short"}, "directory": {"type": "short"},
"name": {"analyzer": "my_nGram", "type": "text"}, "name": {"analyzer": "content_analyser", "type": "text",
"fields": {"nGram": {"type": "text", "analyzer": "my_nGram"}}
},
"album": {"analyzer": "my_nGram", "type": "text"}, "album": {"analyzer": "my_nGram", "type": "text"},
"artist": {"analyzer": "my_nGram", "type": "text"}, "artist": {"analyzer": "my_nGram", "type": "text"},
"title": {"analyzer": "my_nGram", "type": "text"}, "title": {"analyzer": "my_nGram", "type": "text"},

5
run.py
View File

@ -1,4 +1,5 @@
import json import json
import logging
import os import os
import shutil import shutil
from io import BytesIO from io import BytesIO
@ -19,6 +20,10 @@ app = Flask(__name__)
app.secret_key = "A very secret key" app.secret_key = "A very secret key"
storage = LocalStorage(config.db_path) storage = LocalStorage(config.db_path)
# Disable flask logging
flaskLogger = logging.getLogger('werkzeug')
flaskLogger.setLevel(logging.ERROR)
tm = TaskManager(storage) tm = TaskManager(storage)
search = Search("changeme") search = Search("changeme")

View File

@ -128,9 +128,9 @@ class Search:
condition: { condition: {
"multi_match": { "multi_match": {
"query": query, "query": query,
"fields": ["name", "content", "album", "artist", "title", "genre", "fields": ["name^3", "name.nGram^2", "content", "album^4", "artist^4", "title^4", "genre",
"album_artist", "font_name"], "album_artist^4", "font_name^2"],
"operator": "and" "operator": "or"
} }
}, },
"filter": filters "filter": filters
@ -141,15 +141,16 @@ class Search:
], ],
"highlight": { "highlight": {
"fields": { "fields": {
"content": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]}, "content": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]},
"name": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]}, "name": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]},
"font_name": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]}, "name.nGram": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]},
"font_name": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]},
} }
}, },
"aggs": { "aggs": {
"total_size": {"sum": {"field": "size"}} "total_size": {"sum": {"field": "size"}}
}, },
"size": 40}, index=self.index_name, scroll="30m") "size": 40}, index=self.index_name, scroll="15m")
return page return page
@ -189,14 +190,18 @@ class Search:
return None return None
def delete_directory(self, dir_id): def delete_directory(self, dir_id):
while True:
try: try:
self.es.delete_by_query(body={"query": { self.es.delete_by_query(body={"query": {
"bool": { "bool": {
"filter": {"term": {"directory": dir_id}} "filter": {"term": {"directory": dir_id}}
} }
}}, index=self.index_name) }}, index=self.index_name, request_timeout=60)
break
except elasticsearch.exceptions.ConflictError: except elasticsearch.exceptions.ConflictError:
print("Error: multiple delete tasks at the same time") print("Error: multiple delete tasks at the same time")
except Exception as e:
print(e)

View File

@ -94,8 +94,10 @@ body {overflow-y:scroll;}
} }
} }
.hl { mark {
background: #fff217; background: #fff217;
border-radius: 0;
padding: 1px 0;
} }
.content-div { .content-div {

View File

@ -206,6 +206,8 @@ function createDocCard(hit) {
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) { if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension); title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
} else if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name.nGram")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name.nGram"] + extension);
} else { } else {
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension)); title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
} }
@ -491,7 +493,6 @@ function search() {
} }
} }
//Setup page //Setup page
let resultContainer = makeResultContainer(); let resultContainer = makeResultContainer();
searchResults.appendChild(resultContainer); searchResults.appendChild(resultContainer);