Added thumbnail generator

This commit is contained in:
simon987 2018-03-20 19:23:38 -04:00
parent 047d2653bc
commit 90c1de3362
10 changed files with 90 additions and 13 deletions

View File

@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
from indexer import Indexer
from search import Search
from thumbnail import ThumbnailGenerator
class RunningTask:
@ -80,10 +81,10 @@ class TaskManager:
if task.type == Task.INDEX:
c = Crawler([])
path = self.storage.dirs()[task.dir_id].path
self.current_task.total_files.value = c.countFiles(path)
directory = self.storage.dirs()[task.dir_id]
self.current_task.total_files.value = c.countFiles(directory.path)
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files,
self.current_process = Process(target=self.execute_crawl, args=(directory.path, self.current_task.parsed_files,
self.current_task.done,
self.current_task.task.dir_id))
self.current_process.start()
@ -99,17 +100,25 @@ class TaskManager:
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
c.crawl(path, counter)
# todo: create indexer inside the crawler and index every X files
Indexer("changeme").index(c.documents, directory)
done.value = 1
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
docs = list(Search("changeme").getAllDocuments(dir_id))
docs = list(Search("changeme").get_all_documents(dir_id))
print(docs) #todo remove
total_files.value = len(docs)
tn_generator = ThumbnailGenerator(300) # todo get from config
done.value = 1
def cancel_task(self):
self.current_task = None
self.current_process.terminate()

View File

@ -53,9 +53,7 @@ class Indexer:
def index(self, docs: list, directory: int):
print("Indexing " + str(len(docs)) + " docs")
index_string = Indexer.create_bulk_index_string(docs, directory)
print("bulk-start")
self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
print("bulk-done")
def clear(self):

View File

@ -15,7 +15,7 @@ class Search:
except:
print("elasticsearch is not running")
def getAllDocuments(self, dir_id: int):
def get_all_documents(self, dir_id: int):
return helpers.scan(client=self.es,
query={"_source": {"includes": ["path", "name"]},

View File

@ -12,10 +12,10 @@ class CrawlerTest(TestCase):
c.crawl("test_folder")
self.assertEqual(len(c.documents), 28)
self.assertEqual(len(c.documents), 31)
def test_file_count(self):
c = Crawler([])
self.assertEqual(c.countFiles("test_folder"), 28)
self.assertEqual(c.countFiles("test_folder"), 31)

View File

@ -2,6 +2,7 @@ from unittest import TestCase
from thumbnail import ThumbnailGenerator
from PIL import Image
import os
import shutil
class ThumbnailGeneratorTest(TestCase):
@ -22,3 +23,35 @@ class ThumbnailGeneratorTest(TestCase):
if os.path.isfile("test_thumb1.jpg"):
os.remove("test_thumb1.jpg")
def test_generate_all(self):
shutil.rmtree("test_thumbnails")
generator = ThumbnailGenerator(300)
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
generator.generate_all(docs, "test_thumbnails")
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)

BIN
spec/test_folder/sample_5.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
spec/test_folder/sample_6.gif vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

BIN
spec/test_folder/sample_7.bmp vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -54,6 +54,14 @@ class Directory:
def __str__(self):
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
def get_option(self, key):
for option in self.options:
if option.key == key:
return option.value
return None
class Task:

View File

@ -1,14 +1,43 @@
from PIL import Image
import os
from parsing import ContentMimeGuesser
class ThumbnailGenerator:
def __init__(self, size):
self.size = (size, size)
self.mime_guesser = ContentMimeGuesser
def generate(self, path, dest_path):
image = Image.open(path)
image.thumbnail(self.size, Image.BICUBIC)
image.save(dest_path)
image.close()
try:
with open(path, "rb") as image_file:
with Image.open(image_file) as image:
image.thumbnail(self.size, Image.BICUBIC)
canvas = Image.new("RGB", image.size, (255, 0, 255))
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
canvas.paste(image, mask=image.split()[3]) # 3 is the alpha channel
else:
canvas.paste(image)
canvas.save(dest_path, "JPEG", quality=50, optimize=True)
canvas.close()
except OSError as e:
print(e)
print("Not an image " + path)
def generate_all(self, docs, dest_path):
os.makedirs(dest_path, exist_ok=True)
for doc in docs:
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"])
if os.path.isfile(full_path):
self.generate(full_path, os.path.join(dest_path, doc["_id"]))