ES 7.X support. Bug fixes. UI tweaks. Config fixes

This commit is contained in:
simon 2019-05-26 11:31:28 -04:00
parent 64b743870f
commit 980babc5cc
12 changed files with 139 additions and 102 deletions

View File

@ -5,7 +5,8 @@ default_options = {
"ThumbnailColor": "FF00FF",
"TextFileContentLength": "2000",
"PdfFileContentLength": "2000",
"SpreadsheetContentLength": "2000",
"DocxContentLength": "2000",
"SpreadSheetContentLength": "2000",
"EbookContentLength": "2000",
"MimeGuesser": "extension", # extension, content
"CheckSumCalculators": "", # md5, sha1, sha256
@ -19,6 +20,7 @@ index_every = 10000
nGramMin = 3
nGramMax = 3
elasticsearch_url = "http://localhost:9200"
elasticsearch_index = "sist"
# Password hashing
bcrypt_rounds = 13
@ -41,4 +43,4 @@ try:
except:
cairosvg = False
VERSION = "1.1a"
VERSION = "1.2a"

View File

@ -11,7 +11,7 @@ import config
from indexer import Indexer
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
PdfFileParser, DocxParser, EbookParser
PdfFileParser, DocxParser, EbookParser, SpreadSheetParser
from search import Search
from storage import Directory
from storage import Task, LocalStorage
@ -53,7 +53,7 @@ class Crawler:
self.mime_guesser = mime_guesser
def crawl(self, root_dir: str, counter: Value = None, total_files = None):
def crawl(self, root_dir: str, counter: Value = None, total_files=None):
in_q = Queue(50000) # TODO: get from config?
out_q = Queue()
@ -154,7 +154,8 @@ class Crawler:
pass
finally:
out_q.task_done()
self.indexer.index(self.documents, self.dir_id)
if self.documents:
self.indexer.index(self.documents, self.dir_id)
class TaskManager:
@ -162,7 +163,7 @@ class TaskManager:
self.current_task = None
self.storage = storage
self.current_process = None
self.indexer = Indexer("changeme")
self.indexer = Indexer(config.elasticsearch_index)
scheduler = BackgroundScheduler()
scheduler.add_job(self.check_new_task, "interval", seconds=0.5)
@ -188,34 +189,54 @@ class TaskManager:
def execute_crawl(self, directory: Directory, counter: Value, done: Value, total_files: Value):
Search("changeme").delete_directory(directory.id)
Search(config.elasticsearch_index).delete_directory(directory.id)
chksum_calcs = self.make_checksums_list(directory)
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
else ContentMimeGuesser()
c = Crawler(self.make_parser_list(chksum_calcs, directory), mime_guesser, self.indexer, directory.id)
c.crawl(directory.path, counter, total_files)
done.value = 1
@staticmethod
def make_checksums_list(directory):
chksum_calcs = []
for arg in directory.get_option("CheckSumCalculators").split(","):
if arg.strip() == "md5":
chksum_calcs.append(Md5CheckSumCalculator())
elif arg.strip() == "sha1":
chksum_calcs.append(Sha1CheckSumCalculator())
elif arg.strip() == "sha256":
chksum_calcs.append(Sha256CheckSumCalculator())
return chksum_calcs
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
else ContentMimeGuesser()
c = Crawler([GenericFileParser(chksum_calcs, directory.path),
MediaFileParser(chksum_calcs, directory.path),
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path),
PictureFileParser(chksum_calcs, directory.path),
FontParser(chksum_calcs, directory.path),
PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path),
DocxParser(chksum_calcs, int(directory.get_option("SpreadsheetContentLength")), directory.path),
EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path)],
mime_guesser, self.indexer, directory.id)
c.crawl(directory.path, counter, total_files)
done.value = 1
@staticmethod
def make_parser_list(chksum_calcs, directory):
p = [p.strip() for p in directory.get_option("FileParsers").split(",")]
parsers = [GenericFileParser(chksum_calcs, directory.path)]
if "media" in p:
parsers.append(MediaFileParser(chksum_calcs, directory.path))
if "text" in p:
parsers.append(
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path))
if "picture" in p:
parsers.append(PictureFileParser(chksum_calcs, directory.path))
if "font" in p:
parsers.append(FontParser(chksum_calcs, directory.path))
if "pdf" in p:
parsers.append(
PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path))
if "docx" in p:
parsers.append(DocxParser(chksum_calcs, int(directory.get_option("DocxContentLength")), directory.path))
if "spreadsheet" in p:
parsers.append(
SpreadSheetParser(chksum_calcs, int(directory.get_option("SpreadSheetContentLength")), directory.path))
if "ebook" in p:
parsers.append(EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path))
return parsers
def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value):
@ -223,7 +244,7 @@ class TaskManager:
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
docs = Search("changeme").get_all_documents(directory.id)
docs = Search(config.elasticsearch_index).get_all_documents(directory.id)
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
int(directory.get_option("ThumbnailQuality")),

View File

@ -13,7 +13,7 @@ class Indexer:
self.index_name = index
self.es = elasticsearch.Elasticsearch()
requests.head("http://localhost:9200")
requests.head(config.elasticsearch_url)
if self.es.indices.exists(self.index_name):
print("Index is already setup")
else:
@ -93,7 +93,7 @@ class Indexer:
"genre": {"analyzer": "my_nGram", "type": "text"},
"album_artist": {"analyzer": "my_nGram", "type": "text"},
"content": {"analyzer": "content_analyser", "type": "text"},
}}, doc_type="file", index=self.index_name)
}}, doc_type="file", index=self.index_name, include_type_name=True)
self.es.indices.open(index=self.index_name)

Binary file not shown.

2
run.py
View File

@ -25,7 +25,7 @@ flaskLogger = logging.getLogger('werkzeug')
flaskLogger.setLevel(logging.ERROR)
tm = TaskManager(storage)
search = Search("changeme")
search = Search(config.elasticsearch_index)
def get_dir_size(path):

View File

@ -1,10 +1,12 @@
import json
import os
import elasticsearch
import requests
import config
from elasticsearch import helpers
import config
class Search:
@ -14,9 +16,8 @@ class Search:
try:
requests.head(config.elasticsearch_url)
print("elasticsearch is already running")
except:
print("elasticsearch is not running")
print("elasticsearch is not running!")
self.search_iterator = None
@ -33,7 +34,6 @@ class Search:
info = requests.get("http://localhost:9200/" + self.index_name + "/_stats")
if info.status_code == 200:
parsed_info = json.loads(info.text)
return int(parsed_info["indices"][self.index_name]["total"]["store"]["size_in_bytes"])
@ -171,8 +171,9 @@ class Search:
path_list = []
for option in suggestions["suggest"]["path"][0]["options"]:
path_list.append(option["_source"]["path"])
if "suggest" in suggestions:
for option in suggestions["suggest"]["path"][0]["options"]:
path_list.append(option["_source"]["path"])
return path_list
@ -202,6 +203,3 @@ class Search:
print("Error: multiple delete tasks at the same time")
except Exception as e:
print(e)

View File

@ -24,7 +24,7 @@ new InspireTreeDOM(tree, {
tree.select();
tree.node("any").deselect();
tree.on("node.click", function(event, node, handler) {
tree.on("node.click", function (event, node, handler) {
event.preventTreeDefault();
if (node.id === "any") {
@ -44,23 +44,23 @@ new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 75,
renderItem: function (item){
renderItem: function (item) {
return '<div class="autocomplete-suggestion" data-val="' + item + '">' + item + '</div>';
},
source: async function(term, suggest) {
source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
for (let i=0; i<choices.length; i++) {
for (let i = 0; i < choices.length; i++) {
if (~choices[i].toLowerCase().indexOf(term)) {
matches.push(choices[i]);
}
}
suggest(matches);
},
onSelect: function() {
onSelect: function () {
searchQueued = true;
}
});
@ -73,7 +73,9 @@ function makeStatsCard(searchResult) {
statsCardBody.setAttribute("class", "card-body");
let stat = document.createElement("p");
stat.appendChild(document.createTextNode(searchResult["hits"]["total"] + " results in " + searchResult["took"] + "ms"));
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
stat.appendChild(document.createTextNode(totalHits + " results in " + searchResult["took"] + "ms"));
let sizeStat = document.createElement("span");
sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"])));
@ -97,20 +99,20 @@ function makeResultContainer() {
*/
function humanFileSize(bytes) {
if(bytes === 0) {
if (bytes === 0) {
return "? B"
}
let thresh = 1000;
if(Math.abs(bytes) < thresh) {
if (Math.abs(bytes) < thresh) {
return bytes + ' B';
}
let units = ['kB','MB','GB','TB','PB','EB','ZB','YB'];
let units = ['kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'];
let u = -1;
do {
bytes /= thresh;
++u;
} while(Math.abs(bytes) >= thresh && u < units.length - 1);
} while (Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1) + ' ' + units[u];
}
@ -118,15 +120,21 @@ function humanFileSize(bytes) {
/**
* https://stackoverflow.com/questions/6312993
*/
function humanTime (sec_num) {
function humanTime(sec_num) {
sec_num = Math.floor(sec_num);
let hours = Math.floor(sec_num / 3600);
let hours = Math.floor(sec_num / 3600);
let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60);
if (hours < 10) {hours = "0" + hours;}
if (minutes < 10) {minutes = "0" + minutes;}
if (seconds < 10) {seconds = "0" + seconds;}
if (hours < 10) {
hours = "0" + hours;
}
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
return hours + ":" + minutes + ":" + seconds;
}
@ -134,7 +142,7 @@ function humanTime (sec_num) {
function initPopover() {
$('[data-toggle="popover"]').popover({
trigger: "focus",
delay: { "show": 0, "hide": 100 },
delay: {"show": 0, "hide": 100},
placement: "bottom",
html: true
});
@ -152,7 +160,7 @@ function gifOver(thumbnail, documentId) {
thumbnail.mouseStayedOver = true;
window.setTimeout(function() {
window.setTimeout(function () {
if (thumbnail.mouseStayedOver) {
thumbnail.removeEventListener('mouseover', callee, false);
@ -163,7 +171,7 @@ function gifOver(thumbnail, documentId) {
});
thumbnail.addEventListener("mouseout", function() {
thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", "/thumb/" + documentId);
@ -173,10 +181,10 @@ function gifOver(thumbnail, documentId) {
function downloadPopover(element, documentId) {
element.setAttribute("data-content",
'<a class="btn btn-sm btn-primary" href="/dl/'+ documentId +'"><i class="fas fa-download"></i> Download</a>' +
'<a class="btn btn-sm btn-success" style="margin-left:3px;" href="/file/'+ documentId + '" target="_blank"><i class="fas fa-eye"></i> View</a>');
'<a class="btn btn-sm btn-primary" href="/dl/' + documentId + '"><i class="fas fa-download"></i> Download</a>' +
'<a class="btn btn-sm btn-success" style="margin-left:3px;" href="/file/' + documentId + '" target="_blank"><i class="fas fa-eye"></i> View</a>');
element.setAttribute("data-toggle", "popover");
element.addEventListener("mouseover", function() {
element.addEventListener("mouseover", function () {
element.focus();
});
}
@ -242,7 +250,7 @@ function createDocCard(hit) {
thumbnail.setAttribute("controls", "");
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", "/thumb/" + hit["_id"]);
thumbnail.addEventListener("dblclick", function() {
thumbnail.addEventListener("dblclick", function () {
thumbnail.webkitRequestFullScreen();
});
@ -272,7 +280,7 @@ function createDocCard(hit) {
var format = hit["_source"]["format_name"];
//Hover
if(format === "GIF") {
if (format === "GIF") {
gifOver(thumbnail, hit["_id"]);
}
break;
@ -302,10 +310,12 @@ function createDocCard(hit) {
break;
case "image": {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-image");
formatTag.appendChild(document.createTextNode(format));
tags.push(formatTag);
if (format !== undefined) {
let formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-image");
formatTag.appendChild(document.createTextNode(format));
tags.push(formatTag);
}
}
break;
case "audio": {
@ -315,9 +325,7 @@ function createDocCard(hit) {
formatTag.appendChild(document.createTextNode(hit["_source"]["format_long_name"]));
tags.push(formatTag);
}
}
break;
case "text": {
let formatTag = document.createElement("span");
@ -387,13 +395,15 @@ function createDocCard(hit) {
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
pageIndicator.appendChild(document.createTextNode(docCount + " / " +searchResult["hits"]["total"]));
const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
function insertHits(resultContainer, hits) {
for (let i = 0 ; i < hits.length; i++) {
for (let i = 0; i < hits.length; i++) {
resultContainer.appendChild(createDocCard(hits[i]));
docCount++;
}
@ -409,7 +419,7 @@ window.addEventListener("scroll", function () {
//load next page
let xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function() {
xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
let searchResult = JSON.parse(this.responseText);
@ -449,7 +459,7 @@ function getSelectedMimeTypes() {
for (let i = 0; i < selected.length; i++) {
if(selected[i].id === "any") {
if (selected[i].id === "any") {
return "any"
}
@ -468,7 +478,7 @@ function search() {
searchQueued = false;
//Clear old search results
let searchResults = document.getElementById("searchResults");
let searchResults = document.getElementById("searchResults");
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
@ -476,7 +486,7 @@ function search() {
let query = searchBar.value;
let xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function() {
xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
let searchResult = JSON.parse(this.responseText);
@ -542,7 +552,7 @@ $("#sizeSlider").ionRangeSlider({
drag_interval: true,
prettify: function (num) {
if(num === 0) {
if (num === 0) {
return "0 B"
} else if (num >= 3684) {
return humanFileSize(num * num * num) + "+";
@ -550,11 +560,11 @@ $("#sizeSlider").ionRangeSlider({
return humanFileSize(num * num * num)
},
onChange: function(e) {
onChange: function (e) {
size_min = (e.from * e.from * e.from);
size_max = (e.to * e.to * e.to);
if (e.to >= 3684) {
if (e.to >= 3684) {
size_max = 10000000000000;
}
@ -566,12 +576,13 @@ $("#sizeSlider").ionRangeSlider({
function updateDirectories() {
let selected = $('#directories').find('option:selected');
selectedDirs = [];
$(selected).each(function(){
$(selected).each(function () {
selectedDirs.push(parseInt($(this).val()));
});
searchQueued = true;
}
document.getElementById("directories").addEventListener("change", updateDirectories);
updateDirectories();
searchQueued = false;
@ -581,7 +592,7 @@ function getPathChoices() {
return new Promise(getPaths => {
let xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function() {
xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
getPaths(JSON.parse(xhttp.responseText))
}

View File

@ -1,7 +1,9 @@
import sqlite3
import os
import flask_bcrypt
import sqlite3
import time
import flask_bcrypt
import config
@ -39,7 +41,7 @@ class Option:
Data structure to hold a directory option
"""
def __init__(self, key: str, value: str, dir_id: int=None, opt_id: int = None):
def __init__(self, key: str, value: str, dir_id: int = None, opt_id: int = None):
self.key = key
self.value = value
self.id = opt_id
@ -50,6 +52,7 @@ class Directory:
"""
Data structure to hold directory information
"""
def __init__(self, path: str, enabled: bool, options: list, name: str):
self.id = None
self.path = path
@ -59,7 +62,7 @@ class Directory:
def __str__(self):
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
def get_option(self, key):
for option in self.options:
@ -77,7 +80,6 @@ class Directory:
class Task:
INDEX = 1
GEN_THUMBNAIL = 2
@ -268,7 +270,7 @@ class LocalStorage:
c = conn.cursor()
c.execute("UPDATE User SET is_admin=? WHERE username=?", (user.admin, user.username))
c.execute("DELETE FROM User_canRead_Directory WHERE username=?", (user.username, ))
c.execute("DELETE FROM User_canRead_Directory WHERE username=?", (user.username,))
conn.commit()
for access in user.readable_directories:
@ -332,7 +334,7 @@ class LocalStorage:
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute("DELETE FROM Option WHERE id=?", (opt_id, ))
c.execute("DELETE FROM Option WHERE id=?", (opt_id,))
conn.commit()
conn.close()
@ -385,7 +387,6 @@ class LocalStorage:
conn.close()
for db_task in tasks:
task = Task(db_task[2], db_task[1], db_task[3], db_task[4], db_task[0])
self.cached_tasks[task.id] = task
@ -402,7 +403,7 @@ class LocalStorage:
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute("DELETE FROM Task WHERE id=?", (task_id, ))
c.execute("DELETE FROM Task WHERE id=?", (task_id,))
conn.commit()
c.close()
@ -438,4 +439,4 @@ class LocalStorage:
for access in accesses:
access_list.append(access[1])
return access_list
return access_list

View File

@ -44,7 +44,7 @@
<td>{{ directories[dir].name }}</td>
<td style="word-break: break-all"><pre>{{ directories[dir].path }}</pre></td>
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> </a> Manage</td>
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>
</tr>
{% endfor %}
</tbody>

View File

@ -59,18 +59,20 @@
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav mr-auto">
<li class="nav-item">
<a class="nav-link {% if "directory" == active_page %}active{% endif %}" href="/directory">Directories</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "task" == active_page %}active{% endif %}" href="/task">Tasks</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "user" == active_page %}active{% endif %}" href="/user">Users</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "dashboard" == active_page %}active{% endif %}" href="/dashboard">Dashboard</a>
</li>
{% if session["username"] %}
<li class="nav-item">
<a class="nav-link {% if "directory" == active_page %}active{% endif %}" href="/directory">Directories</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "task" == active_page %}active{% endif %}" href="/task">Tasks</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "user" == active_page %}active{% endif %}" href="/user">Users</a>
</li>
<li class="nav-item">
<a class="nav-link {% if "dashboard" == active_page %}active{% endif %}" href="/dashboard">Dashboard</a>
</li>
{% endif %}
</ul>
{% if session["username"] %}

View File

@ -81,7 +81,7 @@
if (currentTask.total === 0) {
document.getElementById("task-label-" + currentTask.id).innerHTML = "Calculating file count...";
document.getElementById("task-label-" + currentTask.id).innerHTML = "Initializing...";
} else {
let bar = document.getElementById("task-bar-" + currentTask.id);
@ -90,6 +90,8 @@
if (percent === 100) {
bar.classList.add("bg-success")
} else {
bar.classList.remove("bg-success")
}
}

View File

@ -36,7 +36,7 @@ class ThumbnailGenerator:
p.terminate()
print("Timed out: " + path)
else:
self.generate_image("tmp", dest_path)
self.generate_image(tmpfile, dest_path)
except Exception:
print("Couldn't make thumbnail for " + path)