mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-16 08:46:49 +00:00
Added thumbnail generator
This commit is contained in:
parent
047d2653bc
commit
90c1de3362
17
crawler.py
17
crawler.py
@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
|
||||
from indexer import Indexer
|
||||
from search import Search
|
||||
from thumbnail import ThumbnailGenerator
|
||||
|
||||
|
||||
class RunningTask:
|
||||
@ -80,10 +81,10 @@ class TaskManager:
|
||||
|
||||
if task.type == Task.INDEX:
|
||||
c = Crawler([])
|
||||
path = self.storage.dirs()[task.dir_id].path
|
||||
self.current_task.total_files.value = c.countFiles(path)
|
||||
directory = self.storage.dirs()[task.dir_id]
|
||||
self.current_task.total_files.value = c.countFiles(directory.path)
|
||||
|
||||
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files,
|
||||
self.current_process = Process(target=self.execute_crawl, args=(directory.path, self.current_task.parsed_files,
|
||||
self.current_task.done,
|
||||
self.current_task.task.dir_id))
|
||||
self.current_process.start()
|
||||
@ -99,17 +100,25 @@ class TaskManager:
|
||||
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
|
||||
c.crawl(path, counter)
|
||||
|
||||
# todo: create indexer inside the crawler and index every X files
|
||||
Indexer("changeme").index(c.documents, directory)
|
||||
done.value = 1
|
||||
|
||||
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
|
||||
|
||||
docs = list(Search("changeme").getAllDocuments(dir_id))
|
||||
docs = list(Search("changeme").get_all_documents(dir_id))
|
||||
|
||||
print(docs) #todo remove
|
||||
|
||||
total_files.value = len(docs)
|
||||
|
||||
tn_generator = ThumbnailGenerator(300) # todo get from config
|
||||
|
||||
|
||||
|
||||
done.value = 1
|
||||
|
||||
|
||||
def cancel_task(self):
|
||||
self.current_task = None
|
||||
self.current_process.terminate()
|
||||
|
@ -53,9 +53,7 @@ class Indexer:
|
||||
def index(self, docs: list, directory: int):
|
||||
print("Indexing " + str(len(docs)) + " docs")
|
||||
index_string = Indexer.create_bulk_index_string(docs, directory)
|
||||
print("bulk-start")
|
||||
self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
|
||||
print("bulk-done")
|
||||
|
||||
def clear(self):
|
||||
|
||||
|
@ -15,7 +15,7 @@ class Search:
|
||||
except:
|
||||
print("elasticsearch is not running")
|
||||
|
||||
def getAllDocuments(self, dir_id: int):
|
||||
def get_all_documents(self, dir_id: int):
|
||||
|
||||
return helpers.scan(client=self.es,
|
||||
query={"_source": {"includes": ["path", "name"]},
|
||||
|
@ -12,10 +12,10 @@ class CrawlerTest(TestCase):
|
||||
|
||||
c.crawl("test_folder")
|
||||
|
||||
self.assertEqual(len(c.documents), 28)
|
||||
self.assertEqual(len(c.documents), 31)
|
||||
|
||||
def test_file_count(self):
|
||||
|
||||
c = Crawler([])
|
||||
|
||||
self.assertEqual(c.countFiles("test_folder"), 28)
|
||||
self.assertEqual(c.countFiles("test_folder"), 31)
|
||||
|
@ -2,6 +2,7 @@ from unittest import TestCase
|
||||
from thumbnail import ThumbnailGenerator
|
||||
from PIL import Image
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
class ThumbnailGeneratorTest(TestCase):
|
||||
@ -22,3 +23,35 @@ class ThumbnailGeneratorTest(TestCase):
|
||||
if os.path.isfile("test_thumb1.jpg"):
|
||||
os.remove("test_thumb1.jpg")
|
||||
|
||||
def test_generate_all(self):
|
||||
shutil.rmtree("test_thumbnails")
|
||||
|
||||
generator = ThumbnailGenerator(300)
|
||||
|
||||
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
|
||||
|
||||
generator.generate_all(docs, "test_thumbnails")
|
||||
|
||||
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
|
||||
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
BIN
spec/test_folder/sample_5.png
vendored
Normal file
BIN
spec/test_folder/sample_5.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
BIN
spec/test_folder/sample_6.gif
vendored
Normal file
BIN
spec/test_folder/sample_6.gif
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
BIN
spec/test_folder/sample_7.bmp
vendored
Normal file
BIN
spec/test_folder/sample_7.bmp
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 88 KiB |
@ -54,6 +54,14 @@ class Directory:
|
||||
|
||||
def __str__(self):
|
||||
return self.path + " | enabled: " + str(self.enabled) + " | opts: " + str(self.options)
|
||||
|
||||
def get_option(self, key):
|
||||
|
||||
for option in self.options:
|
||||
if option.key == key:
|
||||
return option.value
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class Task:
|
||||
|
37
thumbnail.py
37
thumbnail.py
@ -1,14 +1,43 @@
|
||||
from PIL import Image
|
||||
import os
|
||||
from parsing import ContentMimeGuesser
|
||||
|
||||
|
||||
class ThumbnailGenerator:
|
||||
|
||||
def __init__(self, size):
|
||||
self.size = (size, size)
|
||||
self.mime_guesser = ContentMimeGuesser
|
||||
|
||||
def generate(self, path, dest_path):
|
||||
|
||||
image = Image.open(path)
|
||||
image.thumbnail(self.size, Image.BICUBIC)
|
||||
image.save(dest_path)
|
||||
image.close()
|
||||
try:
|
||||
with open(path, "rb") as image_file:
|
||||
with Image.open(image_file) as image:
|
||||
|
||||
image.thumbnail(self.size, Image.BICUBIC)
|
||||
|
||||
canvas = Image.new("RGB", image.size, (255, 0, 255))
|
||||
|
||||
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
|
||||
canvas.paste(image, mask=image.split()[3]) # 3 is the alpha channel
|
||||
else:
|
||||
canvas.paste(image)
|
||||
|
||||
canvas.save(dest_path, "JPEG", quality=50, optimize=True)
|
||||
canvas.close()
|
||||
|
||||
except OSError as e:
|
||||
print(e)
|
||||
print("Not an image " + path)
|
||||
|
||||
def generate_all(self, docs, dest_path):
|
||||
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
for doc in docs:
|
||||
|
||||
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"])
|
||||
|
||||
if os.path.isfile(full_path):
|
||||
self.generate(full_path, os.path.join(dest_path, doc["_id"]))
|
Loading…
x
Reference in New Issue
Block a user