Bug fixes

This commit is contained in:
simon987 2018-04-21 20:36:49 -04:00
parent 6b754b4bb4
commit b963b667b8
18 changed files with 128 additions and 83 deletions

View File

@ -29,11 +29,13 @@ class RunningTask:
class Crawler: class Crawler:
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0): def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0,
root_dir="/"):
self.documents = [] self.documents = []
self.enabled_parsers = enabled_parsers self.enabled_parsers = enabled_parsers
self.indexer = indexer self.indexer = indexer
self.dir_id = dir_id self.dir_id = dir_id
self.root_dir = root_dir
for parser in self.enabled_parsers: for parser in self.enabled_parsers:
if parser.is_default: if parser.is_default:
@ -136,14 +138,14 @@ class TaskManager:
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \ mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
else ContentMimeGuesser() else ContentMimeGuesser()
c = Crawler([GenericFileParser(chksum_calcs), c = Crawler([GenericFileParser(chksum_calcs, directory.path),
MediaFileParser(chksum_calcs), MediaFileParser(chksum_calcs, directory.path),
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))), TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path),
PictureFileParser(chksum_calcs), PictureFileParser(chksum_calcs, directory.path),
FontParser(chksum_calcs), FontParser(chksum_calcs, directory.path),
PdfFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))), # todo get content len from other opt PdfFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt
DocxParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))), # todo get content len from other opt DocxParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt
EbookParser(chksum_calcs, int(directory.get_option("TextFileContentLength")))], # todo get content len from other opt EbookParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path)], # todo get content len from other opt
mime_guesser, self.indexer, directory.id) mime_guesser, self.indexer, directory.id)
c.crawl(directory.path, counter) c.crawl(directory.path, counter)
@ -162,7 +164,7 @@ class TaskManager:
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")), tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
int(directory.get_option("ThumbnailQuality")), int(directory.get_option("ThumbnailQuality")),
directory.get_option("ThumbnailColor")) directory.get_option("ThumbnailColor"))
tn_generator.generate_all(docs, dest_path, counter) tn_generator.generate_all(docs, dest_path, counter, directory)
done.value = 1 done.value = 1

View File

@ -15,7 +15,6 @@ class Indexer:
try: try:
requests.head("http://localhost:9200") requests.head("http://localhost:9200")
print("elasticsearch is already running")
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
import time import time

View File

@ -9,7 +9,7 @@ import html
import warnings import warnings
import docx2txt import docx2txt
import xlrd import xlrd
from pdfminer.pdfparser import PDFParser from pdfminer.pdfparser import PDFParser, PDFSyntaxError
from pdfminer.pdfdocument import PDFDocument from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
@ -124,8 +124,9 @@ class GenericFileParser(FileParser):
mime_types = [] mime_types = []
is_default = True is_default = True
def __init__(self, checksum_calculators: list): def __init__(self, checksum_calculators: list, root_dir: str):
self.checksum_calculators = checksum_calculators self.checksum_calculators = checksum_calculators
self.root_dir = root_dir
def parse(self, full_path: str) -> dict: def parse(self, full_path: str) -> dict:
""" """
@ -141,7 +142,7 @@ class GenericFileParser(FileParser):
name, extension = os.path.splitext(name) name, extension = os.path.splitext(name)
info["size"] = file_stat.st_size info["size"] = file_stat.st_size
info["path"] = path # todo save relative path info["path"] = os.path.relpath(path, self.root_dir)
info["name"] = name info["name"] = name
info["extension"] = extension[1:] info["extension"] = extension[1:]
info["mtime"] = file_stat.st_mtime info["mtime"] = file_stat.st_mtime
@ -156,8 +157,8 @@ class MediaFileParser(GenericFileParser):
is_default = False is_default = False
relevant_properties = ["bit_rate", "nb_streams", "duration", "format_name", "format_long_name"] relevant_properties = ["bit_rate", "nb_streams", "duration", "format_name", "format_long_name"]
def __init__(self, checksum_calculators: list): def __init__(self, checksum_calculators: list, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.mime_types = [ self.mime_types = [
"video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime", "video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
@ -207,8 +208,8 @@ class MediaFileParser(GenericFileParser):
class PictureFileParser(GenericFileParser): class PictureFileParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list): def __init__(self, checksum_calculators: list, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.mime_types = [ self.mime_types = [
"image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif", "image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif",
@ -246,8 +247,8 @@ class PictureFileParser(GenericFileParser):
class TextFileParser(GenericFileParser): class TextFileParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list, content_length: int): def __init__(self, checksum_calculators: list, content_length: int, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.content_length = content_length self.content_length = content_length
self.mime_types = [ self.mime_types = [
@ -271,7 +272,7 @@ class TextFileParser(GenericFileParser):
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch", "text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4", "text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po", "text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po",
"text/x-makefile", "application/javascript" "text/x-makefile", "application/javascript", "application/rtf"
] ]
def parse(self, full_path: str): def parse(self, full_path: str):
@ -298,8 +299,8 @@ class TextFileParser(GenericFileParser):
class FontParser(GenericFileParser): class FontParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list): def __init__(self, checksum_calculators: list, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.mime_types = [ self.mime_types = [
"application/font-sfnt", "application/font-woff", "application/vdn.ms-fontobject", "application/font-sfnt", "application/font-woff", "application/vdn.ms-fontobject",
@ -336,8 +337,8 @@ class FontParser(GenericFileParser):
class PdfFileParser(GenericFileParser): class PdfFileParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list, content_length: int): def __init__(self, checksum_calculators: list, content_length: int, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.content_length = content_length self.content_length = content_length
@ -351,11 +352,14 @@ class PdfFileParser(GenericFileParser):
if self.content_length > 0: if self.content_length > 0:
with open(full_path, "rb") as f: with open(full_path, "rb") as f:
info["content"] = "" try:
parser = PDFParser(f) parser = PDFParser(f)
document = PDFDocument(parser) document = PDFDocument(parser)
except PDFSyntaxError:
print("couldn't parse PDF " + full_path)
return info
info["content"] = ""
if len(document.info) > 0 and "Title" in document.info[0] and document.info[0]["Title"] != b"": if len(document.info) > 0 and "Title" in document.info[0] and document.info[0]["Title"] != b"":
if isinstance(document.info[0]["Title"], bytes): if isinstance(document.info[0]["Title"], bytes):
info["content"] += document.info[0]["Title"].decode("utf-8", "replace") + "\n" info["content"] += document.info[0]["Title"].decode("utf-8", "replace") + "\n"
@ -399,8 +403,8 @@ class PdfFileParser(GenericFileParser):
class EbookParser(GenericFileParser): class EbookParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list, content_length: int): def __init__(self, checksum_calculators: list, content_length: int, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.content_length = content_length self.content_length = content_length
@ -435,8 +439,8 @@ class EbookParser(GenericFileParser):
class DocxParser(GenericFileParser): class DocxParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list, content_length: int): def __init__(self, checksum_calculators: list, content_length: int, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.content_length = content_length self.content_length = content_length
@ -447,12 +451,16 @@ class DocxParser(GenericFileParser):
def parse(self, full_path: str): def parse(self, full_path: str):
info = super().parse(full_path) info = super().parse(full_path)
if self.content_length > 0:
try:
text = docx2txt.process(full_path) text = docx2txt.process(full_path)
if len(text) < self.content_length: if len(text) < self.content_length:
info["content"] = text info["content"] = text
else: else:
info["content"] = text[0:self.content_length] info["content"] = text[0:self.content_length]
except:
print("Couldn't parse Ebook: " + full_path)
return info return info
@ -460,8 +468,8 @@ class DocxParser(GenericFileParser):
class SpreadSheetParser(GenericFileParser): class SpreadSheetParser(GenericFileParser):
is_default = False is_default = False
def __init__(self, checksum_calculators: list, content_length: int): def __init__(self, checksum_calculators: list, content_length: int, root_dir):
super().__init__(checksum_calculators) super().__init__(checksum_calculators, root_dir)
self.content_length = content_length self.content_length = content_length

18
run.py
View File

@ -128,7 +128,23 @@ def search_route():
size_max = request.json["size_max"] size_max = request.json["size_max"]
mime_types = request.json["mime_types"] mime_types = request.json["mime_types"]
must_match = request.json["must_match"] must_match = request.json["must_match"]
directories = request.json["directories"] # todo: make sure dir exists and is enabled directories = request.json["directories"]
# Remove disabled & non-existing directories
for search_directory in directories:
directory_exists = False
for dir_id in storage.dirs():
if search_directory == dir_id:
directory_exists = True
if not storage.dirs()[dir_id].enabled:
directories.remove(search_directory)
break
if not directory_exists:
directories.remove(search_directory)
path = request.json["path"] path = request.json["path"]
page = search.search(query, size_min, size_max, mime_types, must_match, directories, path) page = search.search(query, size_min, size_max, mime_types, must_match, directories, path)

View File

@ -8,9 +8,9 @@ class CrawlerTest(TestCase):
def test_dir_walk(self): def test_dir_walk(self):
c = Crawler([GenericFileParser([Sha1CheckSumCalculator()])]) c = Crawler([GenericFileParser([Sha1CheckSumCalculator()], "test_files/")])
c.crawl("test_folder") c.crawl("./test_folder")
self.assertEqual(len(c.documents), 31) self.assertEqual(len(c.documents), 31)
@ -19,3 +19,16 @@ class CrawlerTest(TestCase):
c = Crawler([]) c = Crawler([])
self.assertEqual(c.countFiles("test_folder"), 31) self.assertEqual(c.countFiles("test_folder"), 31)
def test_path(self):
c = Crawler([GenericFileParser([], "./test_folder")])
c.crawl("./test_folder")
file_count_in_sub2 = 0
for doc in c.documents:
if doc["path"] == "sub2":
file_count_in_sub2 += 1
self.assertEqual(file_count_in_sub2, 2)

View File

@ -6,7 +6,7 @@ class DocxParserTest(TestCase):
def test_parse_content(self): def test_parse_content(self):
parser = DocxParser([], 1000) parser = DocxParser([], 1000, "test_files/")
info = parser.parse("test_files/docx1.docx") info = parser.parse("test_files/docx1.docx")

View File

@ -6,7 +6,7 @@ class EbookParserTest(TestCase):
def test_parse_content(self): def test_parse_content(self):
parser = EbookParser([], 1000) parser = EbookParser([], 1000, "test_files/")
info = parser.parse("test_files/epub1.epub") info = parser.parse("test_files/epub1.epub")

View File

@ -16,34 +16,34 @@ class GenericFileParserTest(TestCase):
test_file.close() test_file.close()
os.utime("test_parse.txt", (1330123456, 1330654321)) os.utime("test_parse.txt", (1330123456, 1330654321))
self.parser = GenericFileParser([Md5CheckSumCalculator()]) self.parser = GenericFileParser([Md5CheckSumCalculator()], "./test_files/")
def tearDown(self): def tearDown(self):
os.remove("test_parse.txt") os.remove("test_parse.txt")
def test_parse_size(self): def test_parse_size(self):
result = self.parser.parse("test_parse.txt") result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["size"], 8) self.assertEqual(result["size"], 8)
def test_parse_name(self): def test_parse_name(self):
result = self.parser.parse("test_parse.txt") result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["name"], "test_parse") self.assertEqual(result["name"], "test_parse")
def test_parse_ext(self): def test_parse_ext(self):
result = self.parser.parse("test_parse.txt") result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["extension"], "txt") self.assertEqual(result["extension"], "txt")
def test_parse_md5(self): def test_parse_md5(self):
result = self.parser.parse("test_parse.txt") result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD") self.assertEqual(result["md5"], "25D55AD283AA400AF464C76D713C07AD")
def test_mtime(self): def test_mtime(self):
result = self.parser.parse("test_parse.txt") result = self.parser.parse("./test_parse.txt")
self.assertEqual(result["mtime"], 1330654321) self.assertEqual(result["mtime"], 1330654321)

View File

@ -6,7 +6,7 @@ class FontParserTest(TestCase):
def test_parse_name_trueType(self): def test_parse_name_trueType(self):
parser = FontParser([]) parser = FontParser([], "test_files/")
info = parser.parse("test_files/truetype1.ttf") info = parser.parse("test_files/truetype1.ttf")
@ -14,7 +14,7 @@ class FontParserTest(TestCase):
def test_parse_name_openType(self): def test_parse_name_openType(self):
parser = FontParser([]) parser = FontParser([], "test_files/")
info = parser.parse("test_files/opentype1.otf") info = parser.parse("test_files/opentype1.otf")
@ -22,7 +22,7 @@ class FontParserTest(TestCase):
def test_parse_name_woff(self): def test_parse_name_woff(self):
parser = FontParser([]) parser = FontParser([], "test_files/")
info = parser.parse("test_files/woff.woff") info = parser.parse("test_files/woff.woff")
@ -30,7 +30,7 @@ class FontParserTest(TestCase):
def test_parse_name_woff2(self): def test_parse_name_woff2(self):
parser = FontParser([]) parser = FontParser([], "test_files/")
info = parser.parse("test_files/woff2.woff2") info = parser.parse("test_files/woff2.woff2")

View File

@ -6,37 +6,33 @@ class MediaFileParserTest(TestCase):
def test_audio_wav(self): def test_audio_wav(self):
parser = MediaFileParser([]) parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/cat1.wav") info = parser.parse("./test_files/cat1.wav")
self.assertEqual(info["format_name"], "wav")
self.assertEqual(info["format_long_name"], "WAV / WAVE (Waveform Audio)") self.assertEqual(info["format_long_name"], "WAV / WAVE (Waveform Audio)")
self.assertEqual(info["duration"], 20.173875) self.assertEqual(info["duration"], 20.173875)
def test_video_mov(self): def test_video_mov(self):
parser = MediaFileParser([]) parser = MediaFileParser([], "./test_files")
info = parser.parse("test_files/vid1.mp4") info = parser.parse("./test_files/vid1.mp4")
self.assertEqual(info["format_name"], "mov,mp4,m4a,3gp,3g2,mj2")
self.assertEqual(info["format_long_name"], "QuickTime / MOV") self.assertEqual(info["format_long_name"], "QuickTime / MOV")
self.assertEqual(info["duration"], 5.334) self.assertEqual(info["duration"], 5.334)
def test_video_webm(self): def test_video_webm(self):
parser = MediaFileParser([]) parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/vid2.webm") info = parser.parse("test_files/vid2.webm")
self.assertEqual(info["format_name"], "matroska,webm")
self.assertEqual(info["format_long_name"], "Matroska / WebM") self.assertEqual(info["format_long_name"], "Matroska / WebM")
self.assertEqual(info["duration"], 10.619) self.assertEqual(info["duration"], 10.619)
def test_video_ogg(self): def test_video_ogg(self):
parser = MediaFileParser([]) parser = MediaFileParser([], "test_files/")
info = parser.parse("test_files/vid3.ogv") info = parser.parse("test_files/vid3.ogv")
self.assertEqual(info["format_name"], "ogg")
self.assertEqual(info["format_long_name"], "Ogg") self.assertEqual(info["format_long_name"], "Ogg")
self.assertEqual(info["duration"], 10.618867) self.assertEqual(info["duration"], 10.618867)

View File

@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
def test_parse_content(self): def test_parse_content(self):
parser = PdfFileParser([], 12488) parser = PdfFileParser([], 12488, "test_files/")
info = parser.parse("test_files/pdf1.pdf") info = parser.parse("test_files/pdf1.pdf")

View File

@ -6,7 +6,7 @@ class PictureFileParserTest(TestCase):
def test_parse_jpg(self): def test_parse_jpg(self):
parser = PictureFileParser([]) parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_1.jpg") info = parser.parse("test_folder/sample_1.jpg")
@ -17,7 +17,7 @@ class PictureFileParserTest(TestCase):
def test_parse_png(self): def test_parse_png(self):
parser = PictureFileParser([]) parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_5.png") info = parser.parse("test_folder/sample_5.png")
@ -28,7 +28,7 @@ class PictureFileParserTest(TestCase):
def test_parse_gif(self): def test_parse_gif(self):
parser = PictureFileParser([]) parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_6.gif") info = parser.parse("test_folder/sample_6.gif")
@ -39,7 +39,7 @@ class PictureFileParserTest(TestCase):
def test_parse_bmp(self): def test_parse_bmp(self):
parser = PictureFileParser([]) parser = PictureFileParser([], "test_files/")
info = parser.parse("test_folder/sample_7.bmp") info = parser.parse("test_folder/sample_7.bmp")

View File

@ -6,7 +6,7 @@ class PdfParserTest(TestCase):
def test_parse_content_xls(self): def test_parse_content_xls(self):
parser = SpreadSheetParser([], 1500) parser = SpreadSheetParser([], 1500, "test_files/")
info = parser.parse("test_files/xls1.xls") info = parser.parse("test_files/xls1.xls")
@ -14,7 +14,7 @@ class PdfParserTest(TestCase):
def test_parse_content_xlsx(self): def test_parse_content_xlsx(self):
parser = SpreadSheetParser([], 1500) parser = SpreadSheetParser([], 1500, "test_files/")
info = parser.parse("test_files/xlsx1.xlsx") info = parser.parse("test_files/xlsx1.xlsx")

View File

@ -6,7 +6,7 @@ class TextFileParserTest(TestCase):
def test_parse_csv(self): def test_parse_csv(self):
parser = TextFileParser([], 1234) parser = TextFileParser([], 1234, "test_files/")
info = parser.parse("test_files/text.csv") info = parser.parse("test_files/text.csv")

View File

@ -592,4 +592,4 @@ document.getElementById("pathBar").addEventListener("keyup", function () {
searchQueued = true; searchQueued = true;
}); });
window.setInterval(search, 75); window.setInterval(search, 150);

View File

@ -43,7 +43,7 @@
{% for dir in directories %} {% for dir in directories %}
<tr> <tr>
<td>{{ directories[dir].name }}</td> <td>{{ directories[dir].name }}</td>
<td><pre style="width: 80%">{{ directories[dir].path }}</pre></td> <td style="word-break: break-all"><pre>{{ directories[dir].path }}</pre></td>
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td> <td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
<td>2018-02-21</td> <td>2018-02-21</td>
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td> <td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>

View File

@ -140,7 +140,6 @@
</div> </div>
<div class="card"> <div class="card">
{# TODO: put github wiki link #}
<div class="card-header">Options <a href="#" style="float:right">Learn more <i class="fas fa-external-link-alt"></i></a></div> <div class="card-header">Options <a href="#" style="float:right">Learn more <i class="fas fa-external-link-alt"></i></a></div>
<div class="card-body"> <div class="card-body">
<table class="info-table table-striped table-hover"> <table class="info-table table-striped table-hover">

View File

@ -1,6 +1,6 @@
from PIL import Image from PIL import Image
import os import os
from multiprocessing import Value from multiprocessing import Value, Process
import ffmpeg import ffmpeg
import cairosvg import cairosvg
@ -20,12 +20,21 @@ class ThumbnailGenerator:
if mime == "image/svg+xml": if mime == "image/svg+xml":
try: try:
cairosvg.svg2png(url=path, write_to="tmp") p = Process(target=cairosvg.svg2png, kwargs={"url": path, "write_to": "tmp"})
p.start()
p.join(1.5)
if p.is_alive():
p.terminate()
print("Timed out: " + path)
else:
self.generate_image("tmp", dest_path) self.generate_image("tmp", dest_path)
os.remove("tmp")
except Exception: except Exception:
print("Couldn't make thumbnail for " + path) print("Couldn't make thumbnail for " + path)
if os.path.exists("tmp"):
os.remove("tmp")
elif mime.startswith("image"): elif mime.startswith("image"):
try: try:
@ -41,18 +50,20 @@ class ThumbnailGenerator:
.run() .run()
) )
self.generate_image("tmp", dest_path) self.generate_image("tmp", dest_path)
os.remove("tmp")
except Exception as e: except Exception as e:
print(e) print(e)
print("Couldn't make thumbnail for " + path) print("Couldn't make thumbnail for " + path)
def generate_all(self, docs, dest_path, counter: Value=None): if os.path.exists("tmp"):
os.remove("tmp")
def generate_all(self, docs, dest_path, counter: Value=None, directory=None):
os.makedirs(dest_path, exist_ok=True) os.makedirs(dest_path, exist_ok=True)
for doc in docs: for doc in docs:
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"] extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension) full_path = os.path.join(directory.path, doc["_source"]["path"], doc["_source"]["name"] + extension)
if os.path.isfile(full_path) and "mime" in doc["_source"]: if os.path.isfile(full_path) and "mime" in doc["_source"]:
self.generate(full_path, os.path.join(dest_path, doc["_id"]), doc["_source"]["mime"]) self.generate(full_path, os.path.join(dest_path, doc["_id"]), doc["_source"]["mime"])
@ -61,6 +72,7 @@ class ThumbnailGenerator:
counter.value += 1 counter.value += 1
def generate_image(self, path, dest_path): def generate_image(self, path, dest_path):
with open(path, "rb") as image_file: with open(path, "rb") as image_file:
with Image.open(image_file) as image: with Image.open(image_file) as image: