Added thumbnail generator

This commit is contained in:
simon987 2018-03-20 19:23:38 -04:00
parent 047d2653bc
commit 90c1de3362
10 changed files with 90 additions and 13 deletions

View File

@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
from indexer import Indexer from indexer import Indexer
from search import Search from search import Search
from thumbnail import ThumbnailGenerator
class RunningTask: class RunningTask:
@ -80,10 +81,10 @@ class TaskManager:
if task.type == Task.INDEX: if task.type == Task.INDEX:
c = Crawler([]) c = Crawler([])
path = self.storage.dirs()[task.dir_id].path directory = self.storage.dirs()[task.dir_id]
self.current_task.total_files.value = c.countFiles(path) self.current_task.total_files.value = c.countFiles(directory.path)
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files, self.current_process = Process(target=self.execute_crawl, args=(directory.path, self.current_task.parsed_files,
self.current_task.done, self.current_task.done,
self.current_task.task.dir_id)) self.current_task.task.dir_id))
self.current_process.start() self.current_process.start()
@ -99,17 +100,25 @@ class TaskManager:
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())]) c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
c.crawl(path, counter) c.crawl(path, counter)
# todo: create indexer inside the crawler and index every X files
Indexer("changeme").index(c.documents, directory) Indexer("changeme").index(c.documents, directory)
done.value = 1 done.value = 1
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value): def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
docs = list(Search("changeme").getAllDocuments(dir_id)) docs = list(Search("changeme").get_all_documents(dir_id))
print(docs) #todo remove
total_files.value = len(docs) total_files.value = len(docs)
tn_generator = ThumbnailGenerator(300) # todo get from config
done.value = 1 done.value = 1
def cancel_task(self): def cancel_task(self):
self.current_task = None self.current_task = None
self.current_process.terminate() self.current_process.terminate()

View File

@ -53,9 +53,7 @@ class Indexer:
def index(self, docs: list, directory: int): def index(self, docs: list, directory: int):
print("Indexing " + str(len(docs)) + " docs") print("Indexing " + str(len(docs)) + " docs")
index_string = Indexer.create_bulk_index_string(docs, directory) index_string = Indexer.create_bulk_index_string(docs, directory)
print("bulk-start")
self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true") self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
print("bulk-done")
def clear(self): def clear(self):

View File

@ -15,7 +15,7 @@ class Search:
except: except:
print("elasticsearch is not running") print("elasticsearch is not running")
def getAllDocuments(self, dir_id: int): def get_all_documents(self, dir_id: int):
return helpers.scan(client=self.es, return helpers.scan(client=self.es,
query={"_source": {"includes": ["path", "name"]}, query={"_source": {"includes": ["path", "name"]},

View File

@ -12,10 +12,10 @@ class CrawlerTest(TestCase):
c.crawl("test_folder") c.crawl("test_folder")
self.assertEqual(len(c.documents), 28) self.assertEqual(len(c.documents), 31)
def test_file_count(self): def test_file_count(self):
c = Crawler([]) c = Crawler([])
self.assertEqual(c.countFiles("test_folder"), 28) self.assertEqual(c.countFiles("test_folder"), 31)

View File

@ -2,6 +2,7 @@ from unittest import TestCase
from thumbnail import ThumbnailGenerator from thumbnail import ThumbnailGenerator
from PIL import Image from PIL import Image
import os import os
import shutil
class ThumbnailGeneratorTest(TestCase): class ThumbnailGeneratorTest(TestCase):
@ -22,3 +23,35 @@ class ThumbnailGeneratorTest(TestCase):
if os.path.isfile("test_thumb1.jpg"): if os.path.isfile("test_thumb1.jpg"):
os.remove("test_thumb1.jpg") os.remove("test_thumb1.jpg")
def test_generate_all(self):
shutil.rmtree("test_thumbnails")
generator = ThumbnailGenerator(300)
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
generator.generate_all(docs, "test_thumbnails")
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)

BIN
spec/test_folder/sample_5.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
spec/test_folder/sample_6.gif vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

BIN
spec/test_folder/sample_7.bmp vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -54,6 +54,14 @@ class Directory:
def __str__(self): def __str__(self):
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options) return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
def get_option(self, key):
for option in self.options:
if option.key == key:
return option.value
return None
class Task: class Task:

View File

@ -1,14 +1,43 @@
from PIL import Image from PIL import Image
import os
from parsing import ContentMimeGuesser
class ThumbnailGenerator: class ThumbnailGenerator:
def __init__(self, size): def __init__(self, size):
self.size = (size, size) self.size = (size, size)
self.mime_guesser = ContentMimeGuesser
def generate(self, path, dest_path): def generate(self, path, dest_path):
image = Image.open(path) try:
image.thumbnail(self.size, Image.BICUBIC) with open(path, "rb") as image_file:
image.save(dest_path) with Image.open(image_file) as image:
image.close()
image.thumbnail(self.size, Image.BICUBIC)
canvas = Image.new("RGB", image.size, (255, 0, 255))
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
canvas.paste(image, mask=image.split()[3]) # 3 is the alpha channel
else:
canvas.paste(image)
canvas.save(dest_path, "JPEG", quality=50, optimize=True)
canvas.close()
except OSError as e:
print(e)
print("Not an image " + path)
def generate_all(self, docs, dest_path):
os.makedirs(dest_path, exist_ok=True)
for doc in docs:
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"])
if os.path.isfile(full_path):
self.generate(full_path, os.path.join(dest_path, doc["_id"]))