diff --git a/config.py b/config.py
index 25f5d3d..2c1d543 100644
--- a/config.py
+++ b/config.py
@@ -5,7 +5,8 @@ default_options = {
"ThumbnailColor": "FF00FF",
"TextFileContentLength": "2000",
"PdfFileContentLength": "2000",
- "SpreadsheetContentLength": "2000",
+ "DocxContentLength": "2000",
+ "SpreadSheetContentLength": "2000",
"EbookContentLength": "2000",
"MimeGuesser": "extension", # extension, content
"CheckSumCalculators": "", # md5, sha1, sha256
@@ -19,6 +20,7 @@ index_every = 10000
nGramMin = 3
nGramMax = 3
elasticsearch_url = "http://localhost:9200"
+elasticsearch_index = "sist"
# Password hashing
bcrypt_rounds = 13
@@ -41,4 +43,4 @@ try:
except:
cairosvg = False
-VERSION = "1.1a"
+VERSION = "1.2a"
diff --git a/crawler.py b/crawler.py
index d2e7785..6b64b8e 100644
--- a/crawler.py
+++ b/crawler.py
@@ -11,7 +11,7 @@ import config
from indexer import Indexer
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser, FontParser, \
- PdfFileParser, DocxParser, EbookParser
+ PdfFileParser, DocxParser, EbookParser, SpreadSheetParser
from search import Search
from storage import Directory
from storage import Task, LocalStorage
@@ -53,7 +53,7 @@ class Crawler:
self.mime_guesser = mime_guesser
- def crawl(self, root_dir: str, counter: Value = None, total_files = None):
+ def crawl(self, root_dir: str, counter: Value = None, total_files=None):
in_q = Queue(50000) # TODO: get from config?
out_q = Queue()
@@ -154,7 +154,8 @@ class Crawler:
pass
finally:
out_q.task_done()
- self.indexer.index(self.documents, self.dir_id)
+ if self.documents:
+ self.indexer.index(self.documents, self.dir_id)
class TaskManager:
@@ -162,7 +163,7 @@ class TaskManager:
self.current_task = None
self.storage = storage
self.current_process = None
- self.indexer = Indexer("changeme")
+ self.indexer = Indexer(config.elasticsearch_index)
scheduler = BackgroundScheduler()
scheduler.add_job(self.check_new_task, "interval", seconds=0.5)
@@ -188,34 +189,54 @@ class TaskManager:
def execute_crawl(self, directory: Directory, counter: Value, done: Value, total_files: Value):
- Search("changeme").delete_directory(directory.id)
+ Search(config.elasticsearch_index).delete_directory(directory.id)
+ chksum_calcs = self.make_checksums_list(directory)
+
+ mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
+ else ContentMimeGuesser()
+
+ c = Crawler(self.make_parser_list(chksum_calcs, directory), mime_guesser, self.indexer, directory.id)
+ c.crawl(directory.path, counter, total_files)
+
+ done.value = 1
+
+ @staticmethod
+ def make_checksums_list(directory):
chksum_calcs = []
-
for arg in directory.get_option("CheckSumCalculators").split(","):
-
if arg.strip() == "md5":
chksum_calcs.append(Md5CheckSumCalculator())
elif arg.strip() == "sha1":
chksum_calcs.append(Sha1CheckSumCalculator())
elif arg.strip() == "sha256":
chksum_calcs.append(Sha256CheckSumCalculator())
+ return chksum_calcs
- mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
- else ContentMimeGuesser()
-
- c = Crawler([GenericFileParser(chksum_calcs, directory.path),
- MediaFileParser(chksum_calcs, directory.path),
- TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path),
- PictureFileParser(chksum_calcs, directory.path),
- FontParser(chksum_calcs, directory.path),
- PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path),
- DocxParser(chksum_calcs, int(directory.get_option("SpreadsheetContentLength")), directory.path),
- EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path)],
- mime_guesser, self.indexer, directory.id)
- c.crawl(directory.path, counter, total_files)
-
- done.value = 1
+ @staticmethod
+ def make_parser_list(chksum_calcs, directory):
+ p = [p.strip() for p in directory.get_option("FileParsers").split(",")]
+ parsers = [GenericFileParser(chksum_calcs, directory.path)]
+ if "media" in p:
+ parsers.append(MediaFileParser(chksum_calcs, directory.path))
+ if "text" in p:
+ parsers.append(
+ TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path))
+ if "picture" in p:
+ parsers.append(PictureFileParser(chksum_calcs, directory.path))
+ if "font" in p:
+ parsers.append(FontParser(chksum_calcs, directory.path))
+ if "pdf" in p:
+ parsers.append(
+ PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path))
+ if "docx" in p:
+ parsers.append(DocxParser(chksum_calcs, int(directory.get_option("DocxContentLength")), directory.path))
+ if "spreadsheet" in p:
+ parsers.append(
+ SpreadSheetParser(chksum_calcs, int(directory.get_option("SpreadSheetContentLength")), directory.path))
+ if "ebook" in p:
+ parsers.append(EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path))
+ return parsers
def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value):
@@ -223,7 +244,7 @@ class TaskManager:
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
- docs = Search("changeme").get_all_documents(directory.id)
+ docs = Search(config.elasticsearch_index).get_all_documents(directory.id)
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
int(directory.get_option("ThumbnailQuality")),
diff --git a/indexer.py b/indexer.py
index 2b881ab..6033073 100644
--- a/indexer.py
+++ b/indexer.py
@@ -13,7 +13,7 @@ class Indexer:
self.index_name = index
self.es = elasticsearch.Elasticsearch()
- requests.head("http://localhost:9200")
+ requests.head(config.elasticsearch_url)
if self.es.indices.exists(self.index_name):
print("Index is already setup")
else:
@@ -93,7 +93,7 @@ class Indexer:
"genre": {"analyzer": "my_nGram", "type": "text"},
"album_artist": {"analyzer": "my_nGram", "type": "text"},
"content": {"analyzer": "content_analyser", "type": "text"},
- }}, doc_type="file", index=self.index_name)
+ }}, doc_type="file", index=self.index_name, include_type_name=True)
self.es.indices.open(index=self.index_name)
diff --git a/local_storage.db b/local_storage.db
index 15bcfe6..5209cf2 100644
Binary files a/local_storage.db and b/local_storage.db differ
diff --git a/run.py b/run.py
index 09a6f67..c2da694 100644
--- a/run.py
+++ b/run.py
@@ -25,7 +25,7 @@ flaskLogger = logging.getLogger('werkzeug')
flaskLogger.setLevel(logging.ERROR)
tm = TaskManager(storage)
-search = Search("changeme")
+search = Search(config.elasticsearch_index)
def get_dir_size(path):
diff --git a/search.py b/search.py
index 6402107..640c7ec 100644
--- a/search.py
+++ b/search.py
@@ -1,10 +1,12 @@
import json
import os
+
import elasticsearch
import requests
-import config
from elasticsearch import helpers
+import config
+
class Search:
@@ -14,9 +16,8 @@ class Search:
try:
requests.head(config.elasticsearch_url)
- print("elasticsearch is already running")
except:
- print("elasticsearch is not running")
+ print("elasticsearch is not running!")
self.search_iterator = None
@@ -33,7 +34,6 @@ class Search:
info = requests.get("http://localhost:9200/" + self.index_name + "/_stats")
if info.status_code == 200:
-
parsed_info = json.loads(info.text)
return int(parsed_info["indices"][self.index_name]["total"]["store"]["size_in_bytes"])
@@ -171,8 +171,9 @@ class Search:
path_list = []
- for option in suggestions["suggest"]["path"][0]["options"]:
- path_list.append(option["_source"]["path"])
+ if "suggest" in suggestions:
+ for option in suggestions["suggest"]["path"][0]["options"]:
+ path_list.append(option["_source"]["path"])
return path_list
@@ -202,6 +203,3 @@ class Search:
print("Error: multiple delete tasks at the same time")
except Exception as e:
print(e)
-
-
-
diff --git a/static/js/search.js b/static/js/search.js
index 0e54d82..03f6b28 100644
--- a/static/js/search.js
+++ b/static/js/search.js
@@ -24,7 +24,7 @@ new InspireTreeDOM(tree, {
tree.select();
tree.node("any").deselect();
-tree.on("node.click", function(event, node, handler) {
+tree.on("node.click", function (event, node, handler) {
event.preventTreeDefault();
if (node.id === "any") {
@@ -44,23 +44,23 @@ new autoComplete({
selector: '#pathBar',
minChars: 1,
delay: 75,
- renderItem: function (item){
+ renderItem: function (item) {
return '
' + item + '
';
},
- source: async function(term, suggest) {
+ source: async function (term, suggest) {
term = term.toLowerCase();
const choices = await getPathChoices();
let matches = [];
- for (let i=0; i= thresh && u < units.length - 1);
+ } while (Math.abs(bytes) >= thresh && u < units.length - 1);
return bytes.toFixed(1) + ' ' + units[u];
}
@@ -118,15 +120,21 @@ function humanFileSize(bytes) {
/**
* https://stackoverflow.com/questions/6312993
*/
-function humanTime (sec_num) {
+function humanTime(sec_num) {
sec_num = Math.floor(sec_num);
- let hours = Math.floor(sec_num / 3600);
+ let hours = Math.floor(sec_num / 3600);
let minutes = Math.floor((sec_num - (hours * 3600)) / 60);
let seconds = sec_num - (hours * 3600) - (minutes * 60);
- if (hours < 10) {hours = "0" + hours;}
- if (minutes < 10) {minutes = "0" + minutes;}
- if (seconds < 10) {seconds = "0" + seconds;}
+ if (hours < 10) {
+ hours = "0" + hours;
+ }
+ if (minutes < 10) {
+ minutes = "0" + minutes;
+ }
+ if (seconds < 10) {
+ seconds = "0" + seconds;
+ }
return hours + ":" + minutes + ":" + seconds;
}
@@ -134,7 +142,7 @@ function humanTime (sec_num) {
function initPopover() {
$('[data-toggle="popover"]').popover({
trigger: "focus",
- delay: { "show": 0, "hide": 100 },
+ delay: {"show": 0, "hide": 100},
placement: "bottom",
html: true
});
@@ -152,7 +160,7 @@ function gifOver(thumbnail, documentId) {
thumbnail.mouseStayedOver = true;
- window.setTimeout(function() {
+ window.setTimeout(function () {
if (thumbnail.mouseStayedOver) {
thumbnail.removeEventListener('mouseover', callee, false);
@@ -163,7 +171,7 @@ function gifOver(thumbnail, documentId) {
});
- thumbnail.addEventListener("mouseout", function() {
+ thumbnail.addEventListener("mouseout", function () {
//Reset timer
thumbnail.mouseStayedOver = false;
thumbnail.setAttribute("src", "/thumb/" + documentId);
@@ -173,10 +181,10 @@ function gifOver(thumbnail, documentId) {
function downloadPopover(element, documentId) {
element.setAttribute("data-content",
- ' Download' +
- ' View');
+ ' Download' +
+ ' View');
element.setAttribute("data-toggle", "popover");
- element.addEventListener("mouseover", function() {
+ element.addEventListener("mouseover", function () {
element.focus();
});
}
@@ -242,7 +250,7 @@ function createDocCard(hit) {
thumbnail.setAttribute("controls", "");
thumbnail.setAttribute("preload", "none");
thumbnail.setAttribute("poster", "/thumb/" + hit["_id"]);
- thumbnail.addEventListener("dblclick", function() {
+ thumbnail.addEventListener("dblclick", function () {
thumbnail.webkitRequestFullScreen();
});
@@ -272,7 +280,7 @@ function createDocCard(hit) {
var format = hit["_source"]["format_name"];
//Hover
- if(format === "GIF") {
+ if (format === "GIF") {
gifOver(thumbnail, hit["_id"]);
}
break;
@@ -302,10 +310,12 @@ function createDocCard(hit) {
break;
case "image": {
- let formatTag = document.createElement("span");
- formatTag.setAttribute("class", "badge badge-pill badge-image");
- formatTag.appendChild(document.createTextNode(format));
- tags.push(formatTag);
+ if (format !== undefined) {
+ let formatTag = document.createElement("span");
+ formatTag.setAttribute("class", "badge badge-pill badge-image");
+ formatTag.appendChild(document.createTextNode(format));
+ tags.push(formatTag);
+ }
}
break;
case "audio": {
@@ -315,9 +325,7 @@ function createDocCard(hit) {
formatTag.appendChild(document.createTextNode(hit["_source"]["format_long_name"]));
tags.push(formatTag);
}
-
}
-
break;
case "text": {
let formatTag = document.createElement("span");
@@ -387,13 +395,15 @@ function createDocCard(hit) {
function makePageIndicator(searchResult) {
let pageIndicator = document.createElement("div");
- pageIndicator.appendChild(document.createTextNode(docCount + " / " +searchResult["hits"]["total"]));
+ const totalHits = searchResult["hits"]["total"].hasOwnProperty("value")
+ ? searchResult["hits"]["total"]["value"] : searchResult["hits"]["total"];
+ pageIndicator.appendChild(document.createTextNode(docCount + " / " + totalHits));
return pageIndicator;
}
function insertHits(resultContainer, hits) {
- for (let i = 0 ; i < hits.length; i++) {
+ for (let i = 0; i < hits.length; i++) {
resultContainer.appendChild(createDocCard(hits[i]));
docCount++;
}
@@ -409,7 +419,7 @@ window.addEventListener("scroll", function () {
//load next page
let xhttp = new XMLHttpRequest();
- xhttp.onreadystatechange = function() {
+ xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
let searchResult = JSON.parse(this.responseText);
@@ -449,7 +459,7 @@ function getSelectedMimeTypes() {
for (let i = 0; i < selected.length; i++) {
- if(selected[i].id === "any") {
+ if (selected[i].id === "any") {
return "any"
}
@@ -468,7 +478,7 @@ function search() {
searchQueued = false;
//Clear old search results
- let searchResults = document.getElementById("searchResults");
+ let searchResults = document.getElementById("searchResults");
while (searchResults.firstChild) {
searchResults.removeChild(searchResults.firstChild);
}
@@ -476,7 +486,7 @@ function search() {
let query = searchBar.value;
let xhttp = new XMLHttpRequest();
- xhttp.onreadystatechange = function() {
+ xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
let searchResult = JSON.parse(this.responseText);
@@ -542,7 +552,7 @@ $("#sizeSlider").ionRangeSlider({
drag_interval: true,
prettify: function (num) {
- if(num === 0) {
+ if (num === 0) {
return "0 B"
} else if (num >= 3684) {
return humanFileSize(num * num * num) + "+";
@@ -550,11 +560,11 @@ $("#sizeSlider").ionRangeSlider({
return humanFileSize(num * num * num)
},
- onChange: function(e) {
+ onChange: function (e) {
size_min = (e.from * e.from * e.from);
size_max = (e.to * e.to * e.to);
- if (e.to >= 3684) {
+ if (e.to >= 3684) {
size_max = 10000000000000;
}
@@ -566,12 +576,13 @@ $("#sizeSlider").ionRangeSlider({
function updateDirectories() {
let selected = $('#directories').find('option:selected');
selectedDirs = [];
- $(selected).each(function(){
+ $(selected).each(function () {
selectedDirs.push(parseInt($(this).val()));
});
searchQueued = true;
}
+
document.getElementById("directories").addEventListener("change", updateDirectories);
updateDirectories();
searchQueued = false;
@@ -581,7 +592,7 @@ function getPathChoices() {
return new Promise(getPaths => {
let xhttp = new XMLHttpRequest();
- xhttp.onreadystatechange = function() {
+ xhttp.onreadystatechange = function () {
if (this.readyState === 4 && this.status === 200) {
getPaths(JSON.parse(xhttp.responseText))
}
diff --git a/storage.py b/storage.py
index 06c8933..88f626c 100644
--- a/storage.py
+++ b/storage.py
@@ -1,7 +1,9 @@
-import sqlite3
import os
-import flask_bcrypt
+import sqlite3
import time
+
+import flask_bcrypt
+
import config
@@ -39,7 +41,7 @@ class Option:
Data structure to hold a directory option
"""
- def __init__(self, key: str, value: str, dir_id: int=None, opt_id: int = None):
+ def __init__(self, key: str, value: str, dir_id: int = None, opt_id: int = None):
self.key = key
self.value = value
self.id = opt_id
@@ -50,6 +52,7 @@ class Directory:
"""
Data structure to hold directory information
"""
+
def __init__(self, path: str, enabled: bool, options: list, name: str):
self.id = None
self.path = path
@@ -59,7 +62,7 @@ class Directory:
def __str__(self):
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
-
+
def get_option(self, key):
for option in self.options:
@@ -77,7 +80,6 @@ class Directory:
class Task:
-
INDEX = 1
GEN_THUMBNAIL = 2
@@ -268,7 +270,7 @@ class LocalStorage:
c = conn.cursor()
c.execute("UPDATE User SET is_admin=? WHERE username=?", (user.admin, user.username))
- c.execute("DELETE FROM User_canRead_Directory WHERE username=?", (user.username, ))
+ c.execute("DELETE FROM User_canRead_Directory WHERE username=?", (user.username,))
conn.commit()
for access in user.readable_directories:
@@ -332,7 +334,7 @@ class LocalStorage:
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
- c.execute("DELETE FROM Option WHERE id=?", (opt_id, ))
+ c.execute("DELETE FROM Option WHERE id=?", (opt_id,))
conn.commit()
conn.close()
@@ -385,7 +387,6 @@ class LocalStorage:
conn.close()
for db_task in tasks:
-
task = Task(db_task[2], db_task[1], db_task[3], db_task[4], db_task[0])
self.cached_tasks[task.id] = task
@@ -402,7 +403,7 @@ class LocalStorage:
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
- c.execute("DELETE FROM Task WHERE id=?", (task_id, ))
+ c.execute("DELETE FROM Task WHERE id=?", (task_id,))
conn.commit()
c.close()
@@ -438,4 +439,4 @@ class LocalStorage:
for access in accesses:
access_list.append(access[1])
- return access_list
\ No newline at end of file
+ return access_list
diff --git a/templates/directory.html b/templates/directory.html
index 8a07fdb..1bf5866 100644
--- a/templates/directory.html
+++ b/templates/directory.html
@@ -44,7 +44,7 @@
{{ directories[dir].name }} |
{{ directories[dir].path }} |
|
- Manage |
+ Manage |
{% endfor %}
diff --git a/templates/layout.html b/templates/layout.html
index c2e55a4..27c0057 100644
--- a/templates/layout.html
+++ b/templates/layout.html
@@ -59,18 +59,20 @@
{% if session["username"] %}
diff --git a/templates/task.html b/templates/task.html
index 38a472b..debeb8c 100644
--- a/templates/task.html
+++ b/templates/task.html
@@ -81,7 +81,7 @@
if (currentTask.total === 0) {
- document.getElementById("task-label-" + currentTask.id).innerHTML = "Calculating file count...";
+ document.getElementById("task-label-" + currentTask.id).innerHTML = "Initializing...";
} else {
let bar = document.getElementById("task-bar-" + currentTask.id);
@@ -90,6 +90,8 @@
if (percent === 100) {
bar.classList.add("bg-success")
+ } else {
+ bar.classList.remove("bg-success")
}
}
diff --git a/thumbnail.py b/thumbnail.py
index e9e4a6d..11db5d5 100644
--- a/thumbnail.py
+++ b/thumbnail.py
@@ -36,7 +36,7 @@ class ThumbnailGenerator:
p.terminate()
print("Timed out: " + path)
else:
- self.generate_image("tmp", dest_path)
+ self.generate_image(tmpfile, dest_path)
except Exception:
print("Couldn't make thumbnail for " + path)