ES 7.X support. Bug fixes. UI tweaks. Config fixes

This commit is contained in:
simon 2019-05-26 11:31:28 -04:00
parent 64b743870f
commit 980babc5cc
12 changed files with 139 additions and 102 deletions

View File

@ -5,7 +5,8 @@ default_options = {
"ThumbnailColor": "FF00FF", "ThumbnailColor": "FF00FF",
"TextFileContentLength": "2000", "TextFileContentLength": "2000",
"PdfFileContentLength": "2000", "PdfFileContentLength": "2000",
"SpreadsheetContentLength": "2000", "DocxContentLength": "2000",
"SpreadSheetContentLength": "2000",
"EbookContentLength": "2000", "EbookContentLength": "2000",
"MimeGuesser": "extension", # extension, content "MimeGuesser": "extension", # extension, content
"CheckSumCalculators": "", # md5, sha1, sha256 "CheckSumCalculators": "", # md5, sha1, sha256
@ -19,6 +20,7 @@ index_every = 10000
nGramMin = 3 nGramMin = 3
nGramMax = 3 nGramMax = 3
elasticsearch_url = "http://localhost:9200" elasticsearch_url = "http://localhost:9200"
elasticsearch_index = "sist"
# Password hashing # Password hashing
bcrypt_rounds = 13 bcrypt_rounds = 13
@ -41,4 +43,4 @@ try:
except: except:
cairosvg = False cairosvg = False
VERSION = "1.1a" VERSION = "1.2a"

View File

@ -11,7 +11,7 @@ import config
from indexer import Indexer from indexer import Indexer
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \ from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \ PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
PdfFileParser, DocxParser, EbookParser PdfFileParser, DocxParser, EbookParser, SpreadSheetParser
from search import Search from search import Search
from storage import Directory from storage import Directory
from storage import Task, LocalStorage from storage import Task, LocalStorage
@ -154,6 +154,7 @@ class Crawler:
pass pass
finally: finally:
out_q.task_done() out_q.task_done()
if self.documents:
self.indexer.index(self.documents, self.dir_id) self.indexer.index(self.documents, self.dir_id)
@ -162,7 +163,7 @@ class TaskManager:
self.current_task = None self.current_task = None
self.storage = storage self.storage = storage
self.current_process = None self.current_process = None
self.indexer = Indexer("changeme") self.indexer = Indexer(config.elasticsearch_index)
scheduler = BackgroundScheduler() scheduler = BackgroundScheduler()
scheduler.add_job(self.check_new_task, "interval", seconds=0.5) scheduler.add_job(self.check_new_task, "interval", seconds=0.5)
@ -188,34 +189,54 @@ class TaskManager:
def execute_crawl(self, directory: Directory, counter: Value, done: Value, total_files: Value): def execute_crawl(self, directory: Directory, counter: Value, done: Value, total_files: Value):
Search("changeme").delete_directory(directory.id) Search(config.elasticsearch_index).delete_directory(directory.id)
chksum_calcs = self.make_checksums_list(directory)
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
else ContentMimeGuesser()
c = Crawler(self.make_parser_list(chksum_calcs, directory), mime_guesser, self.indexer, directory.id)
c.crawl(directory.path, counter, total_files)
done.value = 1
@staticmethod
def make_checksums_list(directory):
chksum_calcs = [] chksum_calcs = []
for arg in directory.get_option("CheckSumCalculators").split(","): for arg in directory.get_option("CheckSumCalculators").split(","):
if arg.strip() == "md5": if arg.strip() == "md5":
chksum_calcs.append(Md5CheckSumCalculator()) chksum_calcs.append(Md5CheckSumCalculator())
elif arg.strip() == "sha1": elif arg.strip() == "sha1":
chksum_calcs.append(Sha1CheckSumCalculator()) chksum_calcs.append(Sha1CheckSumCalculator())
elif arg.strip() == "sha256": elif arg.strip() == "sha256":
chksum_calcs.append(Sha256CheckSumCalculator()) chksum_calcs.append(Sha256CheckSumCalculator())
return chksum_calcs
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \ @staticmethod
else ContentMimeGuesser() def make_parser_list(chksum_calcs, directory):
p = [p.strip() for p in directory.get_option("FileParsers").split(",")]
c = Crawler([GenericFileParser(chksum_calcs, directory.path), parsers = [GenericFileParser(chksum_calcs, directory.path)]
MediaFileParser(chksum_calcs, directory.path), if "media" in p:
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), parsers.append(MediaFileParser(chksum_calcs, directory.path))
PictureFileParser(chksum_calcs, directory.path), if "text" in p:
FontParser(chksum_calcs, directory.path), parsers.append(
PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path), TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path))
DocxParser(chksum_calcs, int(directory.get_option("SpreadsheetContentLength")), directory.path), if "picture" in p:
EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path)], parsers.append(PictureFileParser(chksum_calcs, directory.path))
mime_guesser, self.indexer, directory.id) if "font" in p:
c.crawl(directory.path, counter, total_files) parsers.append(FontParser(chksum_calcs, directory.path))
if "pdf" in p:
done.value = 1 parsers.append(
PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path))
if "docx" in p:
parsers.append(DocxParser(chksum_calcs, int(directory.get_option("DocxContentLength")), directory.path))
if "spreadsheet" in p:
parsers.append(
SpreadSheetParser(chksum_calcs, int(directory.get_option("SpreadSheetContentLength")), directory.path))
if "ebook" in p:
parsers.append(EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path))
return parsers
def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value): def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value):
@ -223,7 +244,7 @@ class TaskManager:
if os.path.exists(dest_path): if os.path.exists(dest_path):
shutil.rmtree(dest_path) shutil.rmtree(dest_path)
docs = Search("changeme").get_all_documents(directory.id) docs = Search(config.elasticsearch_index).get_all_documents(directory.id)
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")), tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
int(directory.get_option("ThumbnailQuality")), int(directory.get_option("ThumbnailQuality")),

View File

@ -13,7 +13,7 @@ class Indexer:
self.index_name = index self.index_name = index
self.es = elasticsearch.Elasticsearch() self.es = elasticsearch.Elasticsearch()
requests.head("http://localhost:9200") requests.head(config.elasticsearch_url)
if self.es.indices.exists(self.index_name): if self.es.indices.exists(self.index_name):
print("Index is already setup") print("Index is already setup")
else: else:
@ -93,7 +93,7 @@ class Indexer:
"genre": {"analyzer": "my_nGram", "type": "text"}, "genre": {"analyzer": "my_nGram", "type": "text"},
"album_artist": {"analyzer": "my_nGram", "type": "text"}, "album_artist": {"analyzer": "my_nGram", "type": "text"},
"content": {"analyzer": "content_analyser", "type": "text"}, "content": {"analyzer": "content_analyser", "type": "text"},
}}, doc_type="file", index=self.index_name) }}, doc_type="file", index=self.index_name, include_type_name=True)
self.es.indices.open(index=self.index_name) self.es.indices.open(index=self.index_name)

Binary file not shown.

2
run.py
View File

@ -25,7 +25,7 @@ flaskLogger = logging.getLogger('werkzeug')
flaskLogger.setLevel(logging.ERROR) flaskLogger.setLevel(logging.ERROR)
tm = TaskManager(storage) tm = TaskManager(storage)
search = Search("changeme") search = Search(config.elasticsearch_index)
def get_dir_size(path): def get_dir_size(path):

View File

@ -1,10 +1,12 @@
import json import json
import os import os
import elasticsearch import elasticsearch
import requests import requests
import config
from elasticsearch import helpers from elasticsearch import helpers
import config
class Search: class Search:
@ -14,9 +16,8 @@ class Search:
try: try:
requests.head(config.elasticsearch_url) requests.head(config.elasticsearch_url)
print("elasticsearch is already running")
except: except:
print("elasticsearch is not running") print("elasticsearch is not running!")
self.search_iterator = None self.search_iterator = None
@ -33,7 +34,6 @@ class Search:
info = requests.get("http://localhost:9200/" + self.index_name + "/_stats") info = requests.get("http://localhost:9200/" + self.index_name + "/_stats")
if info.status_code == 200: if info.status_code == 200:
parsed_info = json.loads(info.text) parsed_info = json.loads(info.text)
return int(parsed_info["indices"][self.index_name]["total"]["store"]["size_in_bytes"]) return int(parsed_info["indices"][self.index_name]["total"]["store"]["size_in_bytes"])
@ -171,6 +171,7 @@ class Search:
path_list = [] path_list = []
if "suggest" in suggestions:
for option in suggestions["suggest"]["path"][0]["options"]: for option in suggestions["suggest"]["path"][0]["options"]:
path_list.append(option["_source"]["path"]) path_list.append(option["_source"]["path"])
@ -202,6 +203,3 @@ class Search:
print("Error: multiple delete tasks at the same time") print("Error: multiple delete tasks at the same time")
except Exception as e: except Exception as e:
print(e) print(e)

View File

@ -73,7 +73,9 @@ function makeStatsCard(searchResult) {
statsCardBody.setAttribute("class", "card-body"); statsCardBody.setAttribute("class", "card-body");
let stat = document.createElement("p"); let stat = document.createElement("p");
stat.appendChild(document.createTextNode(searchResult["hits"]["total"] + " results in " + searchResult["took"] + "ms")); const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
let sizeStat = document.createElement("span"); let sizeStat = document.createElement("span");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"]))); sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
@ -124,9 +126,15 @@ function humanTime (sec_num) {
let minutes = Math.floor((sec_num - (hours * 3600)) / 60); let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60); let seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {hours = "0" + hours;} if (hours < 10) {
if (minutes < 10) {minutes = "0" + minutes;} hours = "0" + hours;
if (seconds < 10) {seconds = "0" + seconds;} }
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
return hours + ":" + minutes + ":" + seconds; return hours + ":" + minutes + ":" + seconds;
} }
@ -302,11 +310,13 @@ function createDocCard(hit) {
break; break;
case "image": { case "image": {
if (format !== undefined) {
let formatTag = document.createElement("span"); let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-image"); formatTag.setAttribute("class", "badge badge-pill badge-image");
formatTag.appendChild(document.createTextNode(format)); formatTag.appendChild(document.createTextNode(format));
tags.push(formatTag); tags.push(formatTag);
} }
}
break; break;
case "audio": { case "audio": {
if (hit["_source"].hasOwnProperty("format_long_name")) { if (hit["_source"].hasOwnProperty("format_long_name")) {
@ -315,9 +325,7 @@ function createDocCard(hit) {
formatTag.appendChild(document.createTextNode(hit["_source"]["format_long_name"])); formatTag.appendChild(document.createTextNode(hit["_source"]["format_long_name"]));
tags.push(formatTag); tags.push(formatTag);
} }
} }
break; break;
case "text": { case "text": {
let formatTag = document.createElement("span"); let formatTag = document.createElement("span");
@ -387,7 +395,9 @@ function createDocCard(hit) {
function makePageIndicator(searchResult) { function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div"); let pageIndicator = document.createElement("div");
pageIndicator.appendChild(document.createTextNode(docCount + " / " +searchResult["hits"]["total"])); const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator; return pageIndicator;
} }
@ -572,6 +582,7 @@ function updateDirectories() {
searchQueued = true; searchQueued = true;
} }
document.getElementById("directories").addEventListener("change", updateDirectories); document.getElementById("directories").addEventListener("change", updateDirectories);
updateDirectories(); updateDirectories();
searchQueued = false; searchQueued = false;

View File

@ -1,7 +1,9 @@
import sqlite3
import os import os
import flask_bcrypt import sqlite3
import time import time
import flask_bcrypt
import config import config
@ -50,6 +52,7 @@ class Directory:
""" """
Data structure to hold directory information Data structure to hold directory information
""" """
def __init__(self, path: str, enabled: bool, options: list, name: str): def __init__(self, path: str, enabled: bool, options: list, name: str):
self.id = None self.id = None
self.path = path self.path = path
@ -77,7 +80,6 @@ class Directory:
class Task: class Task:
INDEX = 1 INDEX = 1
GEN_THUMBNAIL = 2 GEN_THUMBNAIL = 2
@ -385,7 +387,6 @@ class LocalStorage:
conn.close() conn.close()
for db_task in tasks: for db_task in tasks:
task = Task(db_task[2], db_task[1], db_task[3], db_task[4], db_task[0]) task = Task(db_task[2], db_task[1], db_task[3], db_task[4], db_task[0])
self.cached_tasks[task.id] = task self.cached_tasks[task.id] = task

View File

@ -44,7 +44,7 @@
<td>{{ directories[dir].name }}</td> <td>{{ directories[dir].name }}</td>
<td style="word-break: break-all"><pre>{{ directories[dir].path }}</pre></td> <td style="word-break: break-all"><pre>{{ directories[dir].path }}</pre></td>
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td> <td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> </a> Manage</td> <td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>

View File

@ -59,6 +59,7 @@
<div class="collapse navbar-collapse" id="navbarSupportedContent"> <div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav mr-auto"> <ul class="navbar-nav mr-auto">
{% if session["username"] %}
<li class="nav-item"> <li class="nav-item">
<a class="nav-link {% if "directory" == active_page %}active{% endif %}" href="/directory">Directories</a> <a class="nav-link {% if "directory" == active_page %}active{% endif %}" href="/directory">Directories</a>
</li> </li>
@ -71,6 +72,7 @@
<li class="nav-item"> <li class="nav-item">
<a class="nav-link {% if "dashboard" == active_page %}active{% endif %}" href="/dashboard">Dashboard</a> <a class="nav-link {% if "dashboard" == active_page %}active{% endif %}" href="/dashboard">Dashboard</a>
</li> </li>
{% endif %}
</ul> </ul>
{% if session["username"] %} {% if session["username"] %}

View File

@ -81,7 +81,7 @@
if (currentTask.total === 0) { if (currentTask.total === 0) {
document.getElementById("task-label-" + currentTask.id).innerHTML = "Calculating file count..."; document.getElementById("task-label-" + currentTask.id).innerHTML = "Initializing...";
} else { } else {
let bar = document.getElementById("task-bar-" + currentTask.id); let bar = document.getElementById("task-bar-" + currentTask.id);
@ -90,6 +90,8 @@
if (percent === 100) { if (percent === 100) {
bar.classList.add("bg-success") bar.classList.add("bg-success")
} else {
bar.classList.remove("bg-success")
} }
} }

View File

@ -36,7 +36,7 @@ class ThumbnailGenerator:
p.terminate() p.terminate()
print("Timed out: " + path) print("Timed out: " + path)
else: else:
self.generate_image("tmp", dest_path) self.generate_image(tmpfile, dest_path)
except Exception: except Exception:
print("Couldn't make thumbnail for " + path) print("Couldn't make thumbnail for " + path)