mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-19 18:16:45 +00:00
Progress bar + thumbnail generator
This commit is contained in:
parent
9d75fc4d59
commit
047d2653bc
35
crawler.py
35
crawler.py
@ -5,18 +5,19 @@ from multiprocessing import Process, Value
|
|||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
|
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser
|
||||||
from indexer import Indexer
|
from indexer import Indexer
|
||||||
|
from search import Search
|
||||||
|
|
||||||
|
|
||||||
class RunningTask:
|
class RunningTask:
|
||||||
|
|
||||||
def __init__(self, task: Task):
|
def __init__(self, task: Task):
|
||||||
self.total_files = 0
|
self.total_files = Value("i", 0)
|
||||||
self.parsed_files = Value("i", 0)
|
self.parsed_files = Value("i", 0)
|
||||||
self.task = task
|
self.task = task
|
||||||
self.done = Value("i", 0)
|
self.done = Value("i", 0)
|
||||||
|
|
||||||
def to_json(self):
|
def to_json(self):
|
||||||
return json.dumps({"parsed": self.parsed_files.value, "total": self.total_files, "id": self.task.id})
|
return json.dumps({"parsed": self.parsed_files.value, "total": self.total_files.value, "id": self.task.id})
|
||||||
|
|
||||||
|
|
||||||
class Crawler:
|
class Crawler:
|
||||||
@ -77,24 +78,36 @@ class TaskManager:
|
|||||||
def start_task(self, task: Task):
|
def start_task(self, task: Task):
|
||||||
self.current_task = RunningTask(task)
|
self.current_task = RunningTask(task)
|
||||||
|
|
||||||
|
if task.type == Task.INDEX:
|
||||||
c = Crawler([])
|
c = Crawler([])
|
||||||
path = self.storage.dirs()[task.dir_id].path
|
path = self.storage.dirs()[task.dir_id].path
|
||||||
self.current_task.total_files = c.countFiles(path)
|
self.current_task.total_files.value = c.countFiles(path)
|
||||||
|
|
||||||
print("Started task - " + str(self.current_task.total_files) + " files")
|
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files,
|
||||||
print(path)
|
self.current_task.done,
|
||||||
|
self.current_task.task.dir_id))
|
||||||
self.current_process = Process(target=self.execute_crawl, args=(path, self.current_task.parsed_files, self.current_task.done))
|
|
||||||
# self.current_process.daemon = True
|
|
||||||
self.current_process.start()
|
self.current_process.start()
|
||||||
|
|
||||||
def execute_crawl(self, path: str, counter: Value, done: Value):
|
elif task.type == Task.GEN_THUMBNAIL:
|
||||||
|
self.current_process = Process(target=self.execute_thumbnails, args=(self.current_task.task.dir_id,
|
||||||
|
self.current_task.total_files,
|
||||||
|
self.current_task.parsed_files,
|
||||||
|
self.current_task.done))
|
||||||
|
self.current_process.start()
|
||||||
|
|
||||||
|
def execute_crawl(self, path: str, counter: Value, done: Value, directory: int):
|
||||||
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
|
c = Crawler([GenericFileParser([Md5CheckSumCalculator()], ExtensionMimeGuesser())])
|
||||||
c.crawl(path, counter)
|
c.crawl(path, counter)
|
||||||
|
|
||||||
Indexer("changeme").index(c.documents)
|
Indexer("changeme").index(c.documents, directory)
|
||||||
|
done.value = 1
|
||||||
|
|
||||||
|
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
|
||||||
|
|
||||||
|
docs = list(Search("changeme").getAllDocuments(dir_id))
|
||||||
|
|
||||||
|
total_files.value = len(docs)
|
||||||
|
|
||||||
print("Done")
|
|
||||||
done.value = 1
|
done.value = 1
|
||||||
|
|
||||||
def cancel_task(self):
|
def cancel_task(self):
|
||||||
|
12
indexer.py
12
indexer.py
@ -30,7 +30,7 @@ class Indexer:
|
|||||||
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
|
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_bulk_index_string(docs: list):
|
def create_bulk_index_string(docs: list, directory: int):
|
||||||
"""
|
"""
|
||||||
Creates a insert string for sending to elasticsearch
|
Creates a insert string for sending to elasticsearch
|
||||||
"""
|
"""
|
||||||
@ -42,6 +42,7 @@ class Indexer:
|
|||||||
action_string = '{"index":{}}\n'
|
action_string = '{"index":{}}\n'
|
||||||
|
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
doc["directory"] = directory
|
||||||
result += action_string
|
result += action_string
|
||||||
result += json.dumps(doc) + "\n"
|
result += json.dumps(doc) + "\n"
|
||||||
|
|
||||||
@ -49,11 +50,11 @@ class Indexer:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def index(self, docs: list):
|
def index(self, docs: list, directory: int):
|
||||||
print("Indexing " + str(len(docs)) + " docs")
|
print("Indexing " + str(len(docs)) + " docs")
|
||||||
index_string = Indexer.create_bulk_index_string(docs)
|
index_string = Indexer.create_bulk_index_string(docs, directory)
|
||||||
print("bulk-start")
|
print("bulk-start")
|
||||||
self.es.bulk(body=index_string, index=self.index_name, doc_type="file")
|
self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
|
||||||
print("bulk-done")
|
print("bulk-done")
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
@ -73,7 +74,8 @@ class Indexer:
|
|||||||
self.es.indices.put_mapping(body='{"properties": {'
|
self.es.indices.put_mapping(body='{"properties": {'
|
||||||
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
|
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
|
||||||
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
|
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
|
||||||
'"mime": {"type": "keyword"}'
|
'"mime": {"type": "keyword"},'
|
||||||
|
'"directory": {"type": "keyword"}'
|
||||||
'}}', doc_type="file", index=self.index_name)
|
'}}', doc_type="file", index=self.index_name)
|
||||||
|
|
||||||
self.es.indices.open(index=self.index_name)
|
self.es.indices.open(index=self.index_name)
|
||||||
|
24
search.py
Normal file
24
search.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import elasticsearch
|
||||||
|
from elasticsearch import helpers
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class Search:
|
||||||
|
|
||||||
|
def __init__(self, index: str):
|
||||||
|
self.index_name = index
|
||||||
|
self.es = elasticsearch.Elasticsearch()
|
||||||
|
|
||||||
|
try:
|
||||||
|
requests.head("http://localhost:9200")
|
||||||
|
print("elasticsearch is already running")
|
||||||
|
except:
|
||||||
|
print("elasticsearch is not running")
|
||||||
|
|
||||||
|
def getAllDocuments(self, dir_id: int):
|
||||||
|
|
||||||
|
return helpers.scan(client=self.es,
|
||||||
|
query={"_source": {"includes": ["path", "name"]},
|
||||||
|
"query": {"term": {"directory": dir_id}}},
|
||||||
|
index=self.index_name)
|
||||||
|
|
@ -8,9 +8,14 @@ class IndexerTest(TestCase):
|
|||||||
|
|
||||||
docs = [{"name": "doc1"}, {"name": "doc2"}]
|
docs = [{"name": "doc1"}, {"name": "doc2"}]
|
||||||
|
|
||||||
result = Indexer.create_bulk_index_string(docs, "indexName")
|
result = Indexer.create_bulk_index_string(docs, 1)
|
||||||
|
|
||||||
|
self.assertTrue(result == '{"index":{}}\n'
|
||||||
|
'{"directory": 1, "name": "doc1"}\n'
|
||||||
|
'{"index":{}}\n'
|
||||||
|
'{"directory": 1, "name": "doc2"}\n'
|
||||||
|
or result == '{"index":{}}\n'
|
||||||
|
'{"name": "doc1", "directory": 1}\n'
|
||||||
|
'{"index":{}}\n'
|
||||||
|
'{"name": "doc2", "directory": 1}\n')
|
||||||
|
|
||||||
self.assertEqual(result, '{"index":{"_index":"indexName","_type":"file"}}\n'
|
|
||||||
'{"name": "doc1"}\n'
|
|
||||||
'{"index":{"_index":"indexName","_type":"file"}}\n'
|
|
||||||
'{"name": "doc2"}\n')
|
|
||||||
|
24
spec/ThumbnailGenerator_spec.py
Normal file
24
spec/ThumbnailGenerator_spec.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
from thumbnail import ThumbnailGenerator
|
||||||
|
from PIL import Image
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class ThumbnailGeneratorTest(TestCase):
|
||||||
|
|
||||||
|
def test_generate(self):
|
||||||
|
|
||||||
|
generator = ThumbnailGenerator(300)
|
||||||
|
# Original image is 420x315
|
||||||
|
generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg")
|
||||||
|
|
||||||
|
img = Image.open("test_thumb1.jpg")
|
||||||
|
width, height = img.size
|
||||||
|
img.close()
|
||||||
|
|
||||||
|
self.assertEqual(300, width)
|
||||||
|
self.assertEqual(225, height)
|
||||||
|
|
||||||
|
if os.path.isfile("test_thumb1.jpg"):
|
||||||
|
os.remove("test_thumb1.jpg")
|
||||||
|
|
@ -58,6 +58,9 @@ class Directory:
|
|||||||
|
|
||||||
class Task:
|
class Task:
|
||||||
|
|
||||||
|
INDEX = 1
|
||||||
|
GEN_THUMBNAIL = 2
|
||||||
|
|
||||||
def __init__(self, task_type: int, dir_id: int, completed: bool = False, completed_time: time.time = None,
|
def __init__(self, task_type: int, dir_id: int, completed: bool = False, completed_time: time.time = None,
|
||||||
task_id: int = None):
|
task_id: int = None):
|
||||||
self.id = task_id
|
self.id = task_id
|
||||||
|
@ -126,7 +126,6 @@
|
|||||||
<td><a id="opt-{{ option.id }}-btn" class="btn btn-danger" href="/directory/{{ directory.id }}/del_opt/{{ option.id }}" >Remove</a></td>
|
<td><a id="opt-{{ option.id }}-btn" class="btn btn-danger" href="/directory/{{ directory.id }}/del_opt/{{ option.id }}" >Remove</a></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
</tbody>
|
</tbody>
|
||||||
|
@ -119,7 +119,6 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
15
test_generate_big_dir.py
Normal file
15
test_generate_big_dir.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
if not os.path.isdir("big_dir"):
|
||||||
|
os.mkdir("big_dir")
|
||||||
|
|
||||||
|
for i in range(100):
|
||||||
|
|
||||||
|
if not os.path.isdir("big_dir/" + str(i)):
|
||||||
|
os.mkdir("big_dir/" + str(i))
|
||||||
|
|
||||||
|
for j in range(10000):
|
||||||
|
open("big_dir/" + str(i) + "/file-" + str(j), 'a').close()
|
14
thumbnail.py
Normal file
14
thumbnail.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
class ThumbnailGenerator:
|
||||||
|
|
||||||
|
def __init__(self, size):
|
||||||
|
self.size = (size, size)
|
||||||
|
|
||||||
|
def generate(self, path, dest_path):
|
||||||
|
|
||||||
|
image = Image.open(path)
|
||||||
|
image.thumbnail(self.size, Image.BICUBIC)
|
||||||
|
image.save(dest_path)
|
||||||
|
image.close()
|
28
tmp_specs
Normal file
28
tmp_specs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
Ajouter un utilisateur
|
||||||
|
mettre admin
|
||||||
|
Enlever admin
|
||||||
|
ne marche pas si t'est le seul admin
|
||||||
|
|
||||||
|
y'existe conn.executescript
|
||||||
|
|
||||||
|
Utiliser des functions queries pour afficher genre le total size of query, etc
|
||||||
|
Utiliser opendirectories-bot pour afficher des info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Plugins
|
||||||
|
MP3 tags
|
||||||
|
todo: other music
|
||||||
|
Font files
|
||||||
|
images
|
||||||
|
video tags
|
||||||
|
|
||||||
|
use es filter to filter out folders that the user has no permission to search
|
||||||
|
|
||||||
|
option to toggle auto complete
|
||||||
|
option to set password loop count
|
||||||
|
option to chose checksum thingy
|
||||||
|
option to chose mime guesser
|
||||||
|
option to toggle search history/stats
|
||||||
|
|
||||||
|
thumbnails are stored in a folder for each folder: easy to delete
|
Loading…
x
Reference in New Issue
Block a user