mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-10-24 04:26:52 +00:00
Bug fixes
This commit is contained in:
parent
6b754b4bb4
commit
b963b667b8
22
crawler.py
22
crawler.py
@ -29,11 +29,13 @@ class RunningTask:
|
|||||||
|
|
||||||
class Crawler:
|
class Crawler:
|
||||||
|
|
||||||
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0):
|
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0,
|
||||||
|
root_dir="/"):
|
||||||
self.documents = []
|
self.documents = []
|
||||||
self.enabled_parsers = enabled_parsers
|
self.enabled_parsers = enabled_parsers
|
||||||
self.indexer = indexer
|
self.indexer = indexer
|
||||||
self.dir_id = dir_id
|
self.dir_id = dir_id
|
||||||
|
self.root_dir = root_dir
|
||||||
|
|
||||||
for parser in self.enabled_parsers:
|
for parser in self.enabled_parsers:
|
||||||
if parser.is_default:
|
if parser.is_default:
|
||||||
@ -136,14 +138,14 @@ class TaskManager:
|
|||||||
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
|
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
|
||||||
else ContentMimeGuesser()
|
else ContentMimeGuesser()
|
||||||
|
|
||||||
c = Crawler([GenericFileParser(chksum_calcs),
|
c = Crawler([GenericFileParser(chksum_calcs, directory.path),
|
||||||
MediaFileParser(chksum_calcs),
|
MediaFileParser(chksum_calcs, directory.path),
|
||||||
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))),
|
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path),
|
||||||
PictureFileParser(chksum_calcs),
|
PictureFileParser(chksum_calcs, directory.path),
|
||||||
FontParser(chksum_calcs),
|
FontParser(chksum_calcs, directory.path),
|
||||||
PdfFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))), # todo get content len from other opt
|
PdfFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt
|
||||||
DocxParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))), # todo get content len from other opt
|
DocxParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt
|
||||||
EbookParser(chksum_calcs, int(directory.get_option("TextFileContentLength")))], # todo get content len from other opt
|
EbookParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path)], # todo get content len from other opt
|
||||||
mime_guesser, self.indexer, directory.id)
|
mime_guesser, self.indexer, directory.id)
|
||||||
c.crawl(directory.path, counter)
|
c.crawl(directory.path, counter)
|
||||||
|
|
||||||
@ -162,7 +164,7 @@ class TaskManager:
|
|||||||
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
|
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
|
||||||
int(directory.get_option("ThumbnailQuality")),
|
int(directory.get_option("ThumbnailQuality")),
|
||||||
directory.get_option("ThumbnailColor"))
|
directory.get_option("ThumbnailColor"))
|
||||||
tn_generator.generate_all(docs, dest_path, counter)
|
tn_generator.generate_all(docs, dest_path, counter, directory)
|
||||||
|
|
||||||
done.value = 1
|
done.value = 1
|
||||||
|
|
||||||
|
@ -15,7 +15,6 @@ class Indexer:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
requests.head("http://localhost:9200")
|
requests.head("http://localhost:9200")
|
||||||
print("elasticsearch is already running")
|
|
||||||
|
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
import time
|
import time
|
||||||
|
52
parsing.py
52
parsing.py
@ -9,7 +9,7 @@ import html
|
|||||||
import warnings
|
import warnings
|
||||||
import docx2txt
|
import docx2txt
|
||||||
import xlrd
|
import xlrd
|
||||||
from pdfminer.pdfparser import PDFParser
|
from pdfminer.pdfparser import PDFParser, PDFSyntaxError
|
||||||
from pdfminer.pdfdocument import PDFDocument
|
from pdfminer.pdfdocument import PDFDocument
|
||||||
from pdfminer.pdfpage import PDFPage
|
from pdfminer.pdfpage import PDFPage
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
@ -124,8 +124,9 @@ class GenericFileParser(FileParser):
|
|||||||
mime_types = []
|
mime_types = []
|
||||||
is_default = True
|
is_default = True
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list):
|
def __init__(self, checksum_calculators: list, root_dir: str):
|
||||||
self.checksum_calculators = checksum_calculators
|
self.checksum_calculators = checksum_calculators
|
||||||
|
self.root_dir = root_dir
|
||||||
|
|
||||||
def parse(self, full_path: str) -> dict:
|
def parse(self, full_path: str) -> dict:
|
||||||
"""
|
"""
|
||||||
@ -141,7 +142,7 @@ class GenericFileParser(FileParser):
|
|||||||
name, extension = os.path.splitext(name)
|
name, extension = os.path.splitext(name)
|
||||||
|
|
||||||
info["size"] = file_stat.st_size
|
info["size"] = file_stat.st_size
|
||||||
info["path"] = path # todo save relative path
|
info["path"] = os.path.relpath(path, self.root_dir)
|
||||||
info["name"] = name
|
info["name"] = name
|
||||||
info["extension"] = extension[1:]
|
info["extension"] = extension[1:]
|
||||||
info["mtime"] = file_stat.st_mtime
|
info["mtime"] = file_stat.st_mtime
|
||||||
@ -156,8 +157,8 @@ class MediaFileParser(GenericFileParser):
|
|||||||
is_default = False
|
is_default = False
|
||||||
relevant_properties = ["bit_rate", "nb_streams", "duration", "format_name", "format_long_name"]
|
relevant_properties = ["bit_rate", "nb_streams", "duration", "format_name", "format_long_name"]
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list):
|
def __init__(self, checksum_calculators: list, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.mime_types = [
|
self.mime_types = [
|
||||||
"video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
|
"video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
|
||||||
@ -207,8 +208,8 @@ class MediaFileParser(GenericFileParser):
|
|||||||
class PictureFileParser(GenericFileParser):
|
class PictureFileParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list):
|
def __init__(self, checksum_calculators: list, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.mime_types = [
|
self.mime_types = [
|
||||||
"image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif",
|
"image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif",
|
||||||
@ -246,8 +247,8 @@ class PictureFileParser(GenericFileParser):
|
|||||||
class TextFileParser(GenericFileParser):
|
class TextFileParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list, content_length: int):
|
def __init__(self, checksum_calculators: list, content_length: int, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
self.content_length = content_length
|
self.content_length = content_length
|
||||||
|
|
||||||
self.mime_types = [
|
self.mime_types = [
|
||||||
@ -271,7 +272,7 @@ class TextFileParser(GenericFileParser):
|
|||||||
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
|
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
|
||||||
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
|
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
|
||||||
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po",
|
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po",
|
||||||
"text/x-makefile", "application/javascript"
|
"text/x-makefile", "application/javascript", "application/rtf"
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse(self, full_path: str):
|
def parse(self, full_path: str):
|
||||||
@ -298,8 +299,8 @@ class TextFileParser(GenericFileParser):
|
|||||||
class FontParser(GenericFileParser):
|
class FontParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list):
|
def __init__(self, checksum_calculators: list, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.mime_types = [
|
self.mime_types = [
|
||||||
"application/font-sfnt", "application/font-woff", "application/vdn.ms-fontobject",
|
"application/font-sfnt", "application/font-woff", "application/vdn.ms-fontobject",
|
||||||
@ -336,8 +337,8 @@ class FontParser(GenericFileParser):
|
|||||||
class PdfFileParser(GenericFileParser):
|
class PdfFileParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list, content_length: int):
|
def __init__(self, checksum_calculators: list, content_length: int, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.content_length = content_length
|
self.content_length = content_length
|
||||||
|
|
||||||
@ -351,11 +352,14 @@ class PdfFileParser(GenericFileParser):
|
|||||||
if self.content_length > 0:
|
if self.content_length > 0:
|
||||||
with open(full_path, "rb") as f:
|
with open(full_path, "rb") as f:
|
||||||
|
|
||||||
info["content"] = ""
|
try:
|
||||||
|
|
||||||
parser = PDFParser(f)
|
parser = PDFParser(f)
|
||||||
document = PDFDocument(parser)
|
document = PDFDocument(parser)
|
||||||
|
except PDFSyntaxError:
|
||||||
|
print("couldn't parse PDF " + full_path)
|
||||||
|
return info
|
||||||
|
|
||||||
|
info["content"] = ""
|
||||||
if len(document.info) > 0 and "Title" in document.info[0] and document.info[0]["Title"] != b"":
|
if len(document.info) > 0 and "Title" in document.info[0] and document.info[0]["Title"] != b"":
|
||||||
if isinstance(document.info[0]["Title"], bytes):
|
if isinstance(document.info[0]["Title"], bytes):
|
||||||
info["content"] += document.info[0]["Title"].decode("utf-8", "replace") + "\n"
|
info["content"] += document.info[0]["Title"].decode("utf-8", "replace") + "\n"
|
||||||
@ -399,8 +403,8 @@ class PdfFileParser(GenericFileParser):
|
|||||||
class EbookParser(GenericFileParser):
|
class EbookParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list, content_length: int):
|
def __init__(self, checksum_calculators: list, content_length: int, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.content_length = content_length
|
self.content_length = content_length
|
||||||
|
|
||||||
@ -435,8 +439,8 @@ class EbookParser(GenericFileParser):
|
|||||||
class DocxParser(GenericFileParser):
|
class DocxParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list, content_length: int):
|
def __init__(self, checksum_calculators: list, content_length: int, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.content_length = content_length
|
self.content_length = content_length
|
||||||
|
|
||||||
@ -447,12 +451,16 @@ class DocxParser(GenericFileParser):
|
|||||||
def parse(self, full_path: str):
|
def parse(self, full_path: str):
|
||||||
info = super().parse(full_path)
|
info = super().parse(full_path)
|
||||||
|
|
||||||
|
if self.content_length > 0:
|
||||||
|
try:
|
||||||
text = docx2txt.process(full_path)
|
text = docx2txt.process(full_path)
|
||||||
|
|
||||||
if len(text) < self.content_length:
|
if len(text) < self.content_length:
|
||||||
info["content"] = text
|
info["content"] = text
|
||||||
else:
|
else:
|
||||||
info["content"] = text[0:self.content_length]
|
info["content"] = text[0:self.content_length]
|
||||||
|
except:
|
||||||
|
print("Couldn't parse Ebook: " + full_path)
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
@ -460,8 +468,8 @@ class DocxParser(GenericFileParser):
|
|||||||
class SpreadSheetParser(GenericFileParser):
|
class SpreadSheetParser(GenericFileParser):
|
||||||
is_default = False
|
is_default = False
|
||||||
|
|
||||||
def __init__(self, checksum_calculators: list, content_length: int):
|
def __init__(self, checksum_calculators: list, content_length: int, root_dir):
|
||||||
super().__init__(checksum_calculators)
|
super().__init__(checksum_calculators, root_dir)
|
||||||
|
|
||||||
self.content_length = content_length
|
self.content_length = content_length
|
||||||
|
|
||||||
|
18
run.py
18
run.py
@ -128,7 +128,23 @@ def search_route():
|
|||||||
size_max = request.json["size_max"]
|
size_max = request.json["size_max"]
|
||||||
mime_types = request.json["mime_types"]
|
mime_types = request.json["mime_types"]
|
||||||
must_match = request.json["must_match"]
|
must_match = request.json["must_match"]
|
||||||
directories = request.json["directories"] # todo: make sure dir exists and is enabled
|
directories = request.json["directories"]
|
||||||
|
|
||||||
|
# Remove disabled & non-existing directories
|
||||||
|
for search_directory in directories:
|
||||||
|
directory_exists = False
|
||||||
|
|
||||||
|
for dir_id in storage.dirs():
|
||||||
|
if search_directory == dir_id:
|
||||||
|
directory_exists = True
|
||||||
|
|
||||||
|
if not storage.dirs()[dir_id].enabled:
|
||||||
|
directories.remove(search_directory)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not directory_exists:
|
||||||
|
directories.remove(search_directory)
|
||||||
|
|
||||||
path = request.json["path"]
|
path = request.json["path"]
|
||||||
|
|
||||||
page = search.search(query, size_min, size_max, mime_types, must_match, directories, path)
|
page = search.search(query, size_min, size_max, mime_types, must_match, directories, path)
|
||||||
|
@ -8,9 +8,9 @@ class CrawlerTest(TestCase):
|
|||||||
|
|
||||||
def test_dir_walk(self):
|
def test_dir_walk(self):
|
||||||
|
|
||||||
c = Crawler([GenericFileParser([Sha1CheckSumCalculator()])])
|
c = Crawler([GenericFileParser([Sha1CheckSumCalculator()], "test_files/")])
|
||||||
|
|
||||||
c.crawl("test_folder")
|
c.crawl("./test_folder")
|
||||||
|
|
||||||
self.assertEqual(len(c.documents), 31)
|
self.assertEqual(len(c.documents), 31)
|
||||||
|
|
||||||
@ -19,3 +19,16 @@ class CrawlerTest(TestCase):
|
|||||||
c = Crawler([])
|
c = Crawler([])
|
||||||
|
|
||||||
self.assertEqual(c.countFiles("test_folder"), 31)
|
self.assertEqual(c.countFiles("test_folder"), 31)
|
||||||
|
|
||||||
|
def test_path(self):
|
||||||
|
|
||||||
|
c = Crawler([GenericFileParser([], "./test_folder")])
|
||||||
|
c.crawl("./test_folder")
|
||||||
|
|
||||||
|
file_count_in_sub2 = 0
|
||||||
|
|
||||||
|
for doc in c.documents:
|
||||||
|
if doc["path"] == "sub2":
|
||||||
|
file_count_in_sub2 += 1
|
||||||
|
|
||||||
|
self.assertEqual(file_count_in_sub2, 2)
|
||||||
|
@ -6,7 +6,7 @@ class DocxParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_content(self):
|
def test_parse_content(self):
|
||||||
|
|
||||||
parser = DocxParser([], 1000)
|
parser = DocxParser([], 1000, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/docx1.docx")
|
info = parser.parse("test_files/docx1.docx")
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class EbookParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_content(self):
|
def test_parse_content(self):
|
||||||
|
|
||||||
parser = EbookParser([], 1000)
|
parser = EbookParser([], 1000, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/epub1.epub")
|
info = parser.parse("test_files/epub1.epub")
|
||||||
|
|
||||||
|
@ -16,34 +16,34 @@ class GenericFileParserTest(TestCase):
|
|||||||
test_file.close()
|
test_file.close()
|
||||||
os.utime("test_parse.txt", (1330123456, 1330654321))
|
os.utime("test_parse.txt", (1330123456, 1330654321))
|
||||||
|
|
||||||
self.parser = GenericFileParser([Md5CheckSumCalculator()])
|
self.parser = GenericFileParser([Md5CheckSumCalculator()], "./test_files/")
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
os.remove("test_parse.txt")
|
os.remove("test_parse.txt")
|
||||||
|
|
||||||
def test_parse_size(self):
|
def test_parse_size(self):
|
||||||
result = self.parser.parse("test_parse.txt")
|
result = self.parser.parse("./test_parse.txt")
|
||||||
|
|
||||||
self.assertEqual(result["size"], 8)
|
self.assertEqual(result["size"], 8)
|
||||||
|
|
||||||
def test_parse_name(self):
|
def test_parse_name(self):
|
||||||
result = self.parser.parse("test_parse.txt")
|
result = self.parser.parse("./test_parse.txt")
|
||||||
|
|
||||||
self.assertEqual(result["name"], "test_parse")
|
self.assertEqual(result["name"], "test_parse")
|
||||||
|
|
||||||
def test_parse_ext(self):
|
def test_parse_ext(self):
|
||||||
result = self.parser.parse("test_parse.txt")
|
result = self.parser.parse("./test_parse.txt")
|
||||||
|
|
||||||
self.assertEqual(result["extension"], "txt")
|
self.assertEqual(result["extension"], "txt")
|
||||||
|
|
||||||
def test_parse_md5(self):
|
def test_parse_md5(self):
|
||||||
result = self.parser.parse("test_parse.txt")
|
result = self.parser.parse("./test_parse.txt")
|
||||||
|
|
||||||
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
|
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
|
||||||
|
|
||||||
def test_mtime(self):
|
def test_mtime(self):
|
||||||
|
|
||||||
result = self.parser.parse("test_parse.txt")
|
result = self.parser.parse("./test_parse.txt")
|
||||||
|
|
||||||
self.assertEqual(result["mtime"], 1330654321)
|
self.assertEqual(result["mtime"], 1330654321)
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class FontParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_name_trueType(self):
|
def test_parse_name_trueType(self):
|
||||||
|
|
||||||
parser = FontParser([])
|
parser = FontParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/truetype1.ttf")
|
info = parser.parse("test_files/truetype1.ttf")
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ class FontParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_name_openType(self):
|
def test_parse_name_openType(self):
|
||||||
|
|
||||||
parser = FontParser([])
|
parser = FontParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/opentype1.otf")
|
info = parser.parse("test_files/opentype1.otf")
|
||||||
|
|
||||||
@ -22,7 +22,7 @@ class FontParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_name_woff(self):
|
def test_parse_name_woff(self):
|
||||||
|
|
||||||
parser = FontParser([])
|
parser = FontParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/woff.woff")
|
info = parser.parse("test_files/woff.woff")
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ class FontParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_name_woff2(self):
|
def test_parse_name_woff2(self):
|
||||||
|
|
||||||
parser = FontParser([])
|
parser = FontParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/woff2.woff2")
|
info = parser.parse("test_files/woff2.woff2")
|
||||||
|
|
||||||
|
@ -6,37 +6,33 @@ class MediaFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_audio_wav(self):
|
def test_audio_wav(self):
|
||||||
|
|
||||||
parser = MediaFileParser([])
|
parser = MediaFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/cat1.wav")
|
info = parser.parse("./test_files/cat1.wav")
|
||||||
|
|
||||||
self.assertEqual(info["format_name"], "wav")
|
|
||||||
self.assertEqual(info["format_long_name"], "WAV / WAVE (Waveform Audio)")
|
self.assertEqual(info["format_long_name"], "WAV / WAVE (Waveform Audio)")
|
||||||
self.assertEqual(info["duration"], 20.173875)
|
self.assertEqual(info["duration"], 20.173875)
|
||||||
|
|
||||||
def test_video_mov(self):
|
def test_video_mov(self):
|
||||||
parser = MediaFileParser([])
|
parser = MediaFileParser([], "./test_files")
|
||||||
|
|
||||||
info = parser.parse("test_files/vid1.mp4")
|
info = parser.parse("./test_files/vid1.mp4")
|
||||||
|
|
||||||
self.assertEqual(info["format_name"], "mov,mp4,m4a,3gp,3g2,mj2")
|
|
||||||
self.assertEqual(info["format_long_name"], "QuickTime / MOV")
|
self.assertEqual(info["format_long_name"], "QuickTime / MOV")
|
||||||
self.assertEqual(info["duration"], 5.334)
|
self.assertEqual(info["duration"], 5.334)
|
||||||
|
|
||||||
def test_video_webm(self):
|
def test_video_webm(self):
|
||||||
parser = MediaFileParser([])
|
parser = MediaFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/vid2.webm")
|
info = parser.parse("test_files/vid2.webm")
|
||||||
|
|
||||||
self.assertEqual(info["format_name"], "matroska,webm")
|
|
||||||
self.assertEqual(info["format_long_name"], "Matroska / WebM")
|
self.assertEqual(info["format_long_name"], "Matroska / WebM")
|
||||||
self.assertEqual(info["duration"], 10.619)
|
self.assertEqual(info["duration"], 10.619)
|
||||||
|
|
||||||
def test_video_ogg(self):
|
def test_video_ogg(self):
|
||||||
parser = MediaFileParser([])
|
parser = MediaFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/vid3.ogv")
|
info = parser.parse("test_files/vid3.ogv")
|
||||||
|
|
||||||
self.assertEqual(info["format_name"], "ogg")
|
|
||||||
self.assertEqual(info["format_long_name"], "Ogg")
|
self.assertEqual(info["format_long_name"], "Ogg")
|
||||||
self.assertEqual(info["duration"], 10.618867)
|
self.assertEqual(info["duration"], 10.618867)
|
||||||
|
@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_content(self):
|
def test_parse_content(self):
|
||||||
|
|
||||||
parser = PdfFileParser([], 12488)
|
parser = PdfFileParser([], 12488, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/pdf1.pdf")
|
info = parser.parse("test_files/pdf1.pdf")
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class PictureFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_jpg(self):
|
def test_parse_jpg(self):
|
||||||
|
|
||||||
parser = PictureFileParser([])
|
parser = PictureFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_folder/sample_1.jpg")
|
info = parser.parse("test_folder/sample_1.jpg")
|
||||||
|
|
||||||
@ -17,7 +17,7 @@ class PictureFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_png(self):
|
def test_parse_png(self):
|
||||||
|
|
||||||
parser = PictureFileParser([])
|
parser = PictureFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_folder/sample_5.png")
|
info = parser.parse("test_folder/sample_5.png")
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ class PictureFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_gif(self):
|
def test_parse_gif(self):
|
||||||
|
|
||||||
parser = PictureFileParser([])
|
parser = PictureFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_folder/sample_6.gif")
|
info = parser.parse("test_folder/sample_6.gif")
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ class PictureFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_bmp(self):
|
def test_parse_bmp(self):
|
||||||
|
|
||||||
parser = PictureFileParser([])
|
parser = PictureFileParser([], "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_folder/sample_7.bmp")
|
info = parser.parse("test_folder/sample_7.bmp")
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_content_xls(self):
|
def test_parse_content_xls(self):
|
||||||
|
|
||||||
parser = SpreadSheetParser([], 1500)
|
parser = SpreadSheetParser([], 1500, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/xls1.xls")
|
info = parser.parse("test_files/xls1.xls")
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ class PdfParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_content_xlsx(self):
|
def test_parse_content_xlsx(self):
|
||||||
|
|
||||||
parser = SpreadSheetParser([], 1500)
|
parser = SpreadSheetParser([], 1500, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/xlsx1.xlsx")
|
info = parser.parse("test_files/xlsx1.xlsx")
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ class TextFileParserTest(TestCase):
|
|||||||
|
|
||||||
def test_parse_csv(self):
|
def test_parse_csv(self):
|
||||||
|
|
||||||
parser = TextFileParser([], 1234)
|
parser = TextFileParser([], 1234, "test_files/")
|
||||||
|
|
||||||
info = parser.parse("test_files/text.csv")
|
info = parser.parse("test_files/text.csv")
|
||||||
|
|
||||||
|
@ -592,4 +592,4 @@ document.getElementById("pathBar").addEventListener("keyup", function () {
|
|||||||
searchQueued = true;
|
searchQueued = true;
|
||||||
});
|
});
|
||||||
|
|
||||||
window.setInterval(search, 75);
|
window.setInterval(search, 150);
|
@ -43,7 +43,7 @@
|
|||||||
{% for dir in directories %}
|
{% for dir in directories %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ directories[dir].name }}</td>
|
<td>{{ directories[dir].name }}</td>
|
||||||
<td><pre style="width: 80%">{{ directories[dir].path }}</pre></td>
|
<td style="word-break: break-all"><pre>{{ directories[dir].path }}</pre></td>
|
||||||
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
|
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
|
||||||
<td>2018-02-21</td>
|
<td>2018-02-21</td>
|
||||||
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>
|
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>
|
||||||
|
@ -140,7 +140,6 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
{# TODO: put github wiki link #}
|
|
||||||
<div class="card-header">Options <a href="#" style="float:right">Learn more <i class="fas fa-external-link-alt"></i></a></div>
|
<div class="card-header">Options <a href="#" style="float:right">Learn more <i class="fas fa-external-link-alt"></i></a></div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<table class="info-table table-striped table-hover">
|
<table class="info-table table-striped table-hover">
|
||||||
|
24
thumbnail.py
24
thumbnail.py
@ -1,6 +1,6 @@
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
import os
|
import os
|
||||||
from multiprocessing import Value
|
from multiprocessing import Value, Process
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
import cairosvg
|
import cairosvg
|
||||||
|
|
||||||
@ -20,12 +20,21 @@ class ThumbnailGenerator:
|
|||||||
if mime == "image/svg+xml":
|
if mime == "image/svg+xml":
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cairosvg.svg2png(url=path, write_to="tmp")
|
p = Process(target=cairosvg.svg2png, kwargs={"url": path, "write_to": "tmp"})
|
||||||
|
p.start()
|
||||||
|
p.join(1.5)
|
||||||
|
|
||||||
|
if p.is_alive():
|
||||||
|
p.terminate()
|
||||||
|
print("Timed out: " + path)
|
||||||
|
else:
|
||||||
self.generate_image("tmp", dest_path)
|
self.generate_image("tmp", dest_path)
|
||||||
os.remove("tmp")
|
|
||||||
except Exception:
|
except Exception:
|
||||||
print("Couldn't make thumbnail for " + path)
|
print("Couldn't make thumbnail for " + path)
|
||||||
|
|
||||||
|
if os.path.exists("tmp"):
|
||||||
|
os.remove("tmp")
|
||||||
|
|
||||||
elif mime.startswith("image"):
|
elif mime.startswith("image"):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -41,18 +50,20 @@ class ThumbnailGenerator:
|
|||||||
.run()
|
.run()
|
||||||
)
|
)
|
||||||
self.generate_image("tmp", dest_path)
|
self.generate_image("tmp", dest_path)
|
||||||
os.remove("tmp")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
print("Couldn't make thumbnail for " + path)
|
print("Couldn't make thumbnail for " + path)
|
||||||
|
|
||||||
def generate_all(self, docs, dest_path, counter: Value=None):
|
if os.path.exists("tmp"):
|
||||||
|
os.remove("tmp")
|
||||||
|
|
||||||
|
def generate_all(self, docs, dest_path, counter: Value=None, directory=None):
|
||||||
|
|
||||||
os.makedirs(dest_path, exist_ok=True)
|
os.makedirs(dest_path, exist_ok=True)
|
||||||
|
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
|
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
|
||||||
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)
|
full_path = os.path.join(directory.path, doc["_source"]["path"], doc["_source"]["name"] + extension)
|
||||||
|
|
||||||
if os.path.isfile(full_path) and "mime" in doc["_source"]:
|
if os.path.isfile(full_path) and "mime" in doc["_source"]:
|
||||||
self.generate(full_path, os.path.join(dest_path, doc["_id"]), doc["_source"]["mime"])
|
self.generate(full_path, os.path.join(dest_path, doc["_id"]), doc["_source"]["mime"])
|
||||||
@ -61,6 +72,7 @@ class ThumbnailGenerator:
|
|||||||
counter.value += 1
|
counter.value += 1
|
||||||
|
|
||||||
def generate_image(self, path, dest_path):
|
def generate_image(self, path, dest_path):
|
||||||
|
|
||||||
with open(path, "rb") as image_file:
|
with open(path, "rb") as image_file:
|
||||||
with Image.open(image_file) as image:
|
with Image.open(image_file) as image:
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user