Progress bar + thumbnail generator

This commit is contained in:
simon 2018-03-20 09:01:43 -04:00
parent 9d75fc4d59
commit 047d2653bc
11 changed files with 151 additions and 25 deletions

View File

@ -5,18 +5,19 @@ from multiprocessing import Process, Value
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
from indexer import Indexer from indexer import Indexer
from search import Search
class RunningTask: class RunningTask:
def __init__(self, task: Task): def __init__(self, task: Task):
self.total_files = 0 self.total_files = Value("i", 0)
self.parsed_files = Value("i", 0) self.parsed_files = Value("i", 0)
self.task = task self.task = task
self.done = Value("i", 0) self.done = Value("i", 0)
def to_json(self): def to_json(self):
return json.dumps({"parsed": self.parsed_files.value, "total": self.total_files, "id": self.task.id}) return json.dumps({"parsed": self.parsed_files.value, "total": self.total_files.value, "id": self.task.id})
class Crawler: class Crawler:
@ -77,24 +78,36 @@ class TaskManager:
def start_task(self, task: Task): def start_task(self, task: Task):
self.current_task = RunningTask(task) self.current_task = RunningTask(task)
if task.type == Task.INDEX:
c = Crawler([]) c = Crawler([])
path = self.storage.dirs()[task.dir_id].path path = self.storage.dirs()[task.dir_id].path
self.current_task.total_files = c.countFiles(path) self.current_task.total_files.value = c.countFiles(path)
print("Started task - " + str(self.current_task.total_files) + " files") self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files,
print(path) self.current_task.done,
self.current_task.task.dir_id))
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files, self.current_task.done))
# self.current_process.daemon = True
self.current_process.start() self.current_process.start()
def execute_crawl(self, path: str, counter: Value, done: Value): elif task.type == Task.GEN_THUMBNAIL:
self.current_process = Process(target=self.execute_thumbnails, args=(self.current_task.task.dir_id,
self.current_task.total_files,
self.current_task.parsed_files,
self.current_task.done))
self.current_process.start()
def execute_crawl(self, path: str, counter: Value, done: Value, directory: int):
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())]) c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
c.crawl(path, counter) c.crawl(path, counter)
Indexer("changeme").index(c.documents) Indexer("changeme").index(c.documents, directory)
done.value = 1
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
docs = list(Search("changeme").getAllDocuments(dir_id))
total_files.value = len(docs)
print("Done")
done.value = 1 done.value = 1
def cancel_task(self): def cancel_task(self):

View File

@ -30,7 +30,7 @@ class Indexer:
subprocess.Popen(["elasticsearch/bin/elasticsearch"]) subprocess.Popen(["elasticsearch/bin/elasticsearch"])
@staticmethod @staticmethod
def create_bulk_index_string(docs: list): def create_bulk_index_string(docs: list, directory: int):
""" """
Creates a insert string for sending to elasticsearch Creates a insert string for sending to elasticsearch
""" """
@ -42,6 +42,7 @@ class Indexer:
action_string = '{"index":{}}\n' action_string = '{"index":{}}\n'
for doc in docs: for doc in docs:
doc["directory"] = directory
result += action_string result += action_string
result += json.dumps(doc) + "\n" result += json.dumps(doc) + "\n"
@ -49,11 +50,11 @@ class Indexer:
return result return result
def index(self, docs: list): def index(self, docs: list, directory: int):
print("Indexing " + str(len(docs)) + " docs") print("Indexing " + str(len(docs)) + " docs")
index_string = Indexer.create_bulk_index_string(docs) index_string = Indexer.create_bulk_index_string(docs, directory)
print("bulk-start") print("bulk-start")
self.es.bulk(body=index_string, index=self.index_name, doc_type="file") self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
print("bulk-done") print("bulk-done")
def clear(self): def clear(self):
@ -73,7 +74,8 @@ class Indexer:
self.es.indices.put_mapping(body='{"properties": {' self.es.indices.put_mapping(body='{"properties": {'
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},' '"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
'"suggest-path": {"type": "completion", "analyzer": "keyword"},' '"suggest-path": {"type": "completion", "analyzer": "keyword"},'
'"mime": {"type": "keyword"}' '"mime": {"type": "keyword"},'
'"directory": {"type": "keyword"}'
'}}', doc_type="file", index=self.index_name) '}}', doc_type="file", index=self.index_name)
self.es.indices.open(index=self.index_name) self.es.indices.open(index=self.index_name)

24
search.py Normal file
View File

@ -0,0 +1,24 @@
import elasticsearch
from elasticsearch import helpers
import requests
class Search:
def __init__(self, index: str):
self.index_name = index
self.es = elasticsearch.Elasticsearch()
try:
requests.head("http://localhost:9200")
print("elasticsearch is already running")
except:
print("elasticsearch is not running")
def getAllDocuments(self, dir_id: int):
return helpers.scan(client=self.es,
query={"_source": {"includes": ["path", "name"]},
"query": {"term": {"directory": dir_id}}},
index=self.index_name)

View File

@ -8,9 +8,14 @@ class IndexerTest(TestCase):
docs = [{"name": "doc1"}, {"name": "doc2"}] docs = [{"name": "doc1"}, {"name": "doc2"}]
result = Indexer.create_bulk_index_string(docs, "indexName") result = Indexer.create_bulk_index_string(docs, 1)
self.assertTrue(result == '{"index":{}}\n'
'{"directory": 1, "name": "doc1"}\n'
'{"index":{}}\n'
'{"directory": 1, "name": "doc2"}\n'
or result == '{"index":{}}\n'
'{"name": "doc1", "directory": 1}\n'
'{"index":{}}\n'
'{"name": "doc2", "directory": 1}\n')
self.assertEqual(result, '{"index":{"_index":"indexName","_type":"file"}}\n'
'{"name": "doc1"}\n'
'{"index":{"_index":"indexName","_type":"file"}}\n'
'{"name": "doc2"}\n')

View File

@ -0,0 +1,24 @@
from unittest import TestCase
from thumbnail import ThumbnailGenerator
from PIL import Image
import os
class ThumbnailGeneratorTest(TestCase):
def test_generate(self):
generator = ThumbnailGenerator(300)
# Original image is 420x315
generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg")
img = Image.open("test_thumb1.jpg")
width, height = img.size
img.close()
self.assertEqual(300, width)
self.assertEqual(225, height)
if os.path.isfile("test_thumb1.jpg"):
os.remove("test_thumb1.jpg")

View File

@ -58,6 +58,9 @@ class Directory:
class Task: class Task:
INDEX = 1
GEN_THUMBNAIL = 2
def __init__(self, task_type: int, dir_id: int, completed: bool = False, completed_time: time.time = None, def __init__(self, task_type: int, dir_id: int, completed: bool = False, completed_time: time.time = None,
task_id: int = None): task_id: int = None):
self.id = task_id self.id = task_id

View File

@ -126,7 +126,6 @@
<td><a id="opt-{{ option.id }}-btn" class="btn btn-danger" href="/directory/{{ directory.id }}/del_opt/{{ option.id }}" >Remove</a></td> <td><a id="opt-{{ option.id }}-btn" class="btn btn-danger" href="/directory/{{ directory.id }}/del_opt/{{ option.id }}" >Remove</a></td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>

View File

@ -119,7 +119,6 @@
</div> </div>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>

15
test_generate_big_dir.py Normal file
View File

@ -0,0 +1,15 @@
import os
if __name__ == "__main__":
if not os.path.isdir("big_dir"):
os.mkdir("big_dir")
for i in range(100):
if not os.path.isdir("big_dir/" + str(i)):
os.mkdir("big_dir/" + str(i))
for j in range(10000):
open("big_dir/" + str(i) + "/file-" + str(j), 'a').close()

14
thumbnail.py Normal file
View File

@ -0,0 +1,14 @@
from PIL import Image
class ThumbnailGenerator:
def __init__(self, size):
self.size = (size, size)
def generate(self, path, dest_path):
image = Image.open(path)
image.thumbnail(self.size, Image.BICUBIC)
image.save(dest_path)
image.close()

28
tmp_specs Normal file
View File

@ -0,0 +1,28 @@
Ajouter un utilisateur
mettre admin
Enlever admin
ne marche pas si t'est le seul admin
y'existe conn.executescript
Utiliser des functions queries pour afficher genre le total size of query, etc
Utiliser opendirectories-bot pour afficher des info
Plugins
MP3 tags
todo: other music
Font files
images
video tags
use es filter to filter out folders that the user has no permission to search
option to toggle auto complete
option to set password loop count
option to chose checksum thingy
option to chose mime guesser
option to toggle search history/stats
thumbnails are stored in a folder for each folder: easy to delete