Basic indexing + thumbnail is pretty much done

This commit is contained in:
simon987 2018-03-24 10:00:54 -04:00
parent a8b5e0b76e
commit d5189453e0
17 changed files with 198 additions and 287 deletions

View File

@ -1,5 +1,9 @@
default_options = {
"ThumbnailQuality": "85",
"ThumbnailSize": "275",
"ThumbnailColor": "#FF00FF"
}
"ThumbnailColor": "FF00FF",
"TextFileContentLenght": "16384",
"MimeGuesser": "extension", # extension, content
"CheckSumCalculators": "", # md5, sha1, sha256
}

View File

@ -3,10 +3,13 @@ from storage import Task, LocalStorage
import json
from multiprocessing import Process, Value
from apscheduler.schedulers.background import BackgroundScheduler
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser
from parsing import GenericFileParser, Md5CheckSumCalculator, ExtensionMimeGuesser, MediaFileParser, TextFileParser, \
PictureFileParser, Sha1CheckSumCalculator, Sha256CheckSumCalculator, ContentMimeGuesser, MimeGuesser
from indexer import Indexer
from search import Search
from thumbnail import ThumbnailGenerator
from storage import Directory
import shutil
class RunningTask:
@ -23,7 +26,7 @@ class RunningTask:
class Crawler:
def __init__(self, enabled_parsers: list):
def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser()):
self.documents = []
self.enabled_parsers = enabled_parsers
@ -37,16 +40,16 @@ class Crawler:
for ext in parser.mime_types:
self.ext_map[ext] = parser
def crawl(self, root_dir: str, counter: Value=None):
self.mime_guesser = mime_guesser
mime_guesser = ExtensionMimeGuesser() #todo config
def crawl(self, root_dir: str, counter: Value=None):
for root, dirs, files in os.walk(root_dir):
for filename in files:
full_path = os.path.join(root, filename)
mime = mime_guesser.guess_mime(full_path)
mime = self.mime_guesser.guess_mime(full_path)
parser = self.ext_map.get(mime, self.default_parser)
@ -55,6 +58,7 @@ class Crawler:
counter.value += 1
doc = parser.parse(full_path)
doc["mime"] = mime
self.documents.append(doc)
except FileNotFoundError:
@ -83,39 +87,62 @@ class TaskManager:
def start_task(self, task: Task):
self.current_task = RunningTask(task)
directory = self.storage.dirs()[task.dir_id]
if task.type == Task.INDEX:
c = Crawler([])
directory = self.storage.dirs()[task.dir_id]
self.current_task.total_files.value = c.countFiles(directory.path)
self.current_process = Process(target=self.execute_crawl, args=(directory.path, self.current_task.parsed_files,
self.current_task.done,
self.current_task.task.dir_id))
self.current_process.start()
self.current_process = Process(target=self.execute_crawl, args=(directory, self.current_task.parsed_files,
self.current_task.done))
elif task.type == Task.GEN_THUMBNAIL:
self.current_process = Process(target=self.execute_thumbnails, args=(self.current_task.task.dir_id,
self.current_process = Process(target=self.execute_thumbnails, args=(directory,
self.current_task.total_files,
self.current_task.parsed_files,
self.current_task.done))
self.current_process.start()
self.current_process.start()
def execute_crawl(self, path: str, counter: Value, done: Value, directory: int):
c = Crawler([GenericFileParser([]), MediaFileParser([]), TextFileParser([], 1024)])
c.crawl(path, counter)
def execute_crawl(self, directory: Directory, counter: Value, done: Value):
chksum_calcs = []
for arg in directory.get_option("CheckSumCalculators").split(","):
if arg.strip() == "md5":
chksum_calcs.append(Md5CheckSumCalculator())
elif arg.strip() == "sha1":
chksum_calcs.append(Sha1CheckSumCalculator())
elif arg.strip() == "sha256":
chksum_calcs.append(Sha256CheckSumCalculator())
mime_guesser = ExtensionMimeGuesser() if directory.get_option("MimeGuesser") == "extension" \
else ContentMimeGuesser()
c = Crawler([GenericFileParser(chksum_calcs),
MediaFileParser(chksum_calcs),
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLenght"))),
PictureFileParser(chksum_calcs)],
mime_guesser)
c.crawl(directory.path, counter)
# todo: create indexer inside the crawler and index every X files
Indexer("changeme").index(c.documents, directory)
Indexer("changeme").index(c.documents, directory.id)
done.value = 1
def execute_thumbnails(self, dir_id: int, total_files: Value, counter: Value, done: Value):
def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value):
docs = list(Search("changeme").get_all_documents(dir_id))
dest_path = os.path.join("thumbnails", str(directory.id))
shutil.rmtree(dest_path)
docs = list(Search("changeme").get_all_documents(directory.id))
total_files.value = len(docs)
tn_generator = ThumbnailGenerator(275) # todo get from config
tn_generator.generate_all(docs, os.path.join("thumbnails", str(dir_id)), counter)
tn_generator = ThumbnailGenerator(int(directory.get_option("ThumbnailSize")),
int(directory.get_option("ThumbnailQuality")),
directory.get_option("ThumbnailColor"))
tn_generator.generate_all(docs, dest_path, counter)
done.value = 1

View File

@ -7,7 +7,6 @@ import json
import chardet
from PIL import Image
class MimeGuesser:
def guess_mime(self, full_path):
@ -130,7 +129,7 @@ class GenericFileParser(FileParser):
path, name = os.path.split(full_path)
info["size"] = file_stat.st_size
info["path"] = path
info["path"] = path # todo save relative path
info["name"] = name
info["mtime"] = file_stat.st_mtime

View File

@ -7,4 +7,6 @@ requests
apscheduler
ffmpeg-python
fonttools
chardet
chardet
exifread
humanfriendly

47
run.py
View File

@ -3,12 +3,29 @@ from storage import Directory, Option, Task
from storage import LocalStorage, DuplicateDirectoryException
from crawler import RunningTask, TaskManager
import json
import os
import humanfriendly
from search import Search
app = Flask(__name__)
app.secret_key = "A very secret key"
storage = LocalStorage("local_storage.db")
tm = TaskManager(storage)
search = Search("changeme")
def get_dir_size(path):
size = 0
for root, dirs, files in os.walk(path):
for filename in files:
full_path = os.path.join(root, filename)
size += os.path.getsize(full_path)
return size
@app.route("/")
@ -45,8 +62,15 @@ def directory_add():
def directory_manage(dir_id):
directory = storage.dirs()[dir_id]
tn_size = get_dir_size("thumbnails/" + str(dir_id))
tn_size_formatted = humanfriendly.format_size(tn_size)
return render_template("directory_manage.html", directory=directory)
index_size = search.get_index_size()
index_size_formatted = humanfriendly.format_size(index_size)
return render_template("directory_manage.html", directory=directory, tn_size=tn_size,
tn_size_formatted=tn_size_formatted, index_size=index_size,
index_size_formatted=index_size_formatted, doc_count=search.get_doc_count())
@app.route("/directory/<int:dir_id>/update")
@ -94,11 +118,24 @@ def directory_del(dir_id):
return redirect("/directory")
for t in storage.tasks():
a_task = t
break
@app.route("/directory/<int:dir_id>/reset")
def directory_reset(dir_id):
directory = storage.dirs()[dir_id]
for opt in directory.options:
storage.del_option(opt.id)
directory.set_default_options()
for opt in directory.options:
opt.dir_id = dir_id
storage.save_option(opt)
storage.dir_cache_outdated = True
flash("<strong>Reset directory options to default settings</strong>", "success")
return redirect("directory/" + str(dir_id))
# tm = None
@app.route("/task")
def task():

View File

@ -1,6 +1,7 @@
import elasticsearch
from elasticsearch import helpers
import requests
import json
class Search:
@ -22,3 +23,27 @@ class Search:
"query": {"term": {"directory": dir_id}}},
index=self.index_name)
def get_index_size(self):
try:
info = requests.get("http://localhost:9200/" + self.index_name + "/_stats")
if info.status_code == 200:
parsed_info = json.loads(info.text)
return int(parsed_info["indices"][self.index_name]["primaries"]["store"]["size_in_bytes"])
except:
return 0
def get_doc_count(self):
try:
info = requests.get("http://localhost:9200/" + self.index_name + "/_stats")
if info.status_code == 200:
parsed_info = json.loads(info.text)
return int(parsed_info["indices"][self.index_name]["primaries"]["indexing"]["index_total"])
except:
return 0

7
static/css/bootstrap.min.css vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,72 +0,0 @@
.keen-dashboard {
background: #f2f2f2;
font-family: 'Gotham Rounded SSm A', 'Gotham Rounded SSm B', 'Helvetica Neue', Helvetica, Arial, sans-serif;
}
.keen-dataviz {
background: #fff;
border: 1px solid #e7e7e7;
border-radius: 2px;
box-sizing: border-box;
}
.keen-dataviz-title {
border-bottom: 1px solid #e7e7e7;
border-radius: 2px 2px 0 0;
font-size: 13px;
padding: 2px 10px 0;
text-transform: uppercase;
}
.keen-dataviz-stage {
padding: 10px;
}
.keen-dataviz-notes {
background: #fbfbfb;
border-radius: 0 0 2px 2px;
border-top: 1px solid #e7e7e7;
font-family: 'Helvetica Neue', Helvetica, sans-serif;
font-size: 11px;
padding: 0 10px;
}
.keen-dataviz .keen-dataviz-metric {
border-radius: 2px;
}
.keen-dataviz .keen-spinner-indicator {
border-top-color: rgba(0, 187, 222, .4);
}
.keen-dashboard .chart-wrapper {
background: #fff;
border: 1px solid #e2e2e2;
border-radius: 3px;
margin-bottom: 10px;
}
.keen-dashboard .chart-wrapper .chart-title {
border-bottom: 1px solid #d7d7d7;
color: #666;
font-size: 14px;
font-weight: 200;
padding: 7px 10px 4px;
}
.keen-dashboard .chart-wrapper .chart-stage {
overflow: hidden;
padding: 5px 10px;
position: relative;
}
.keen-dashboard .chart-wrapper .chart-notes {
background: #fbfbfb;
border-top: 1px solid #e2e2e2;
color: #808080;
font-size: 12px;
padding: 8px 10px 5px;
}
.keen-dashboard .chart-wrapper .keen-dataviz,
.keen-dashboard .chart-wrapper .keen-dataviz-title,
.keen-dashboard .chart-stage .chart-title {
border: medium none;
}

View File

@ -1,155 +0,0 @@
a, a:focus, a:hover, a:active {
color: #00afd7;
}
h1, h2, h3 {
font-family: "Gotham Rounded", "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: 12px 0;
}
h1 {
font-size: 32px;
font-weight: 100;
letter-spacing: .02em;
line-height: 48px;
margin: 12px 0;
}
h2 {
color: #2a333c;
font-weight: 200;
font-size: 21px;
}
h3 {
color: rgb(84, 102, 120);
font-size: 21px;
font-weight: 500;
letter-spacing: -0.28px;
line-height: 29.39px;
}
.btn {
background: transparent;
border: 1px solid white;
}
.keen-logo {
height: 38px;
margin: 0 15px 0 0;
width: 150px;
}
.navbar-toggle {
background-color: rgba(255,255,255,.25);
}
.navbar-toggle .icon-bar {
background: #fff;
}
.navbar-nav {
margin: 5px 0 0;
}
.navbar-nav > li > a {
font-size: 15px;
font-weight: 200;
letter-spacing: 0.03em;
padding-top: 19px;
text-shadow: 0 0 2px rgba(0,0,0,.1);
}
.navbar-nav > li > a:focus,
.navbar-nav > li > a:hover {
background: transparent none;
}
.navbar-nav > li > a.navbar-btn {
background-color: rgba(255,255,255,.25);
border: medium none;
padding: 10px 15px;
}
.navbar-nav > li > a.navbar-btn:focus,
.navbar-nav > li > a.navbar-btn:hover {
background-color: rgba(255,255,255,.35);
}
.navbar-collapse {
box-shadow: none;
}
.masthead {
background-color: #00afd7;
background-image: url("../img/bg-bars.png");
background-position: 0 -290px;
background-repeat: repeat-x;
color: #fff;
margin: 0 0 24px;
padding: 20px 0;
}
.masthead h1 {
margin: 0;
}
.masthead small,
.masthead a,
.masthead a:focus,
.masthead a:hover,
.masthead a:active {
color: #fff;
}
.masthead p {
color: #b3e7f3;
font-weight: 100;
letter-spacing: .05em;
}
.hero {
background-position: 50% 100%;
min-height: 450px;
text-align: center;
}
.hero h1 {
font-size: 48px;
margin: 120px 0 0;
}
.hero .lead {
margin-bottom: 32px;
}
.hero a.hero-btn {
border: 2px solid #fff;
display: block;
font-family: "Gotham Rounded", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 24px;
font-weight: 200;
margin: 0 auto 12px;
max-width: 320px;
padding: 12px 0 6px;
}
.hero a.hero-btn:focus,
.hero a.hero-btn:hover {
border-color: transparent;
background-color: #fff;
color: #00afd7;
}
.sample-item {
margin-bottom: 24px;
}
.signup {
float: left;
display: inline-block;
vertical-align: middle;
margin-top: -6px;
margin-right: 10px;
}
.love {
border-top: 1px solid #d7d7d7;
color: #546678;
margin: 24px 0 0;
padding: 15px 0;
text-align: center;
}
.love p {
margin-bottom: 0;
}

7
static/js/bootstrap.min.js vendored Normal file

File diff suppressed because one or more lines are too long

5
static/js/jquery.min.js vendored Normal file

File diff suppressed because one or more lines are too long

5
static/js/popper.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -18,7 +18,7 @@
<div class="form-group">
<input type="text" class="form-control" placeholder="Absolute path" name="path">
</div>
<button type="submit" class="btn btn-success">Add Directory</button>
<button type="submit" class="btn btn-success"><i class="fas fa-plus"></i> Add Directory</button>
</form>
</div>
@ -46,7 +46,7 @@
<td><pre style="width: 80%">{{ directories[dir].path }}</pre></td>
<td><i class="far {{ "fa-check-square" if directories[dir].enabled else "fa-square" }}"></i></td>
<td>2018-02-21</td>
<td><a href="directory/{{ dir }}" class="btn btn-primary">Manage</a> </td>
<td><a href="directory/{{ dir }}" class="btn btn-primary"><i class="fas fa-cog"></i> Manage</a> </td>
</tr>
{% endfor %}
</tbody>

View File

@ -75,7 +75,7 @@
</tr>
<tr onclick="modifyPath()">
<th style="width: 20%">Path</th>
<th style="width: 20%">Path</th>Task
<td>
<pre id="path" title="Click to update">{{ directory.path }}</pre>
</td>
@ -84,8 +84,7 @@
<tr>
<th style="width: 20%">Enabled</th>
<td>
<form action="/directory/{{ directory.id }}/update" style="display: inline;">
<form action="/directory/{{ directory.id }}/update" style="display: inline;margin-left: 6px;">
<input type="hidden" name="enabled" value="{{ "0" if directory.enabled else "1" }}">
<button class="btn btn-sm {{ "btn-danger" if directory.enabled else "btn-success" }}">
<i class="far {{ "fa-check-square" if directory.enabled else "fa-square" }}"></i>
@ -94,6 +93,21 @@
</form>
</td>
</tr>
<tr>
<th>Thumbnail cache size</th>
<td><pre>{{ tn_size_formatted }} ({{ tn_size }} bytes)</pre></td>
</tr>
<tr>
<th>Index size</th>
<td><pre>{{ index_size_formatted }} ({{ index_size }} bytes)</pre></td>
</tr>
<tr>
<th>Document count</th>
<td><pre>{{ doc_count }}</pre></td>
</tr>
</table>
</div>
@ -130,27 +144,35 @@
<div class="card-body">
<div class="d-flex">
<div class="dropdown" style="margin-right: 1em;">
<button class="btn dropdown-toggle btn-primary" data-toggle="dropdown">Create a task</button>
<div class="dropdown-menu">
<a class="dropdown-item" href="#">Indexing task</a>
<a class="dropdown-item" href="#">Thumbnail generation task</a>
</div>
</div>
<div class="dropdown">
<form action="/task/add" class="p-2">
<input type="hidden" value="1" name="type">
<input type="hidden" value="{{ directory.id }}" name="directory">
<button class="btn btn-primary" href="/task/" value="Generate thumbnails">
<i class="fas fa-book"></i> Generate index
</button>
</form>
<form action="/task/add" class="p-2">
<input type="hidden" value="2" name="type">
<input type="hidden" value="{{ directory.id }}" name="directory">
<button class="btn btn-primary" href="/task/" value="Generate thumbnails">
<i class="far fa-images"></i> Generate thumbnails
</button>
</form>
<div class="dropdown p-2">
<button class="btn dropdown-toggle btn-danger" data-toggle="dropdown">Action</button>
<div class="dropdown-menu">
<a class="dropdown-item" href="/directory/{{ directory.id }}/del">Delete directory</a>
<a class="dropdown-item" href="#">Reset to default settings</a>
<a class="dropdown-item" href="/directory/{{ directory.id }}/reset">Reset to default settings</a>
</div>
</div>
</div>
</div>
</div>
</div>

View File

@ -6,11 +6,11 @@
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no' />
<!-- Demo Dependencies -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js" type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>
<script src="/static/js/jquery.min.js" type="text/javascript"></script>
<script src="/static/js/popper.min.js" type="text/javascript"></script>
<script src="/static/js/bootstrap.min.js" type="text/javascript"></script>
<script src="/static/js/Chart.min.js" type="text/javascript"></script>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
<link rel="stylesheet" href="/static/css/bootstrap.min.css">
<link href="/static/css/fontawesome-all.min.css" rel="stylesheet" type="text/css">

View File

@ -59,7 +59,7 @@
{% endfor %}
</select>
<input type="submit" class="form-control btn btn-success" value="Add">
<button class="form-control btn btn-success"><i class="fas fa-plus"></i> Add</button>
</form>
</div>
</div>
@ -103,10 +103,10 @@
<div class="card">
<div class="card-header">Ongoing tasks</div>
<div class="card-body">
{% for task_id in tasks | sort()%}
{% for task_id in tasks | sort() %}
<div class="task-wrapper container-fluid">
<span class="task-name">{{ directories[tasks[task_id].dir_id].name }} - </span>
<span class="task-info">{{ tasks[task_id].type }}</span>
<a class="task-name" href="/directory/{{ tasks[task_id].dir_id }}">{{ directories[tasks[task_id].dir_id].name }}</a>
<span class="task-info"> - {{ tasks[task_id].type }}</span>
<div class="d-flex p-2">
<div class="container-fluid p-2">

View File

@ -7,9 +7,11 @@ import ffmpeg
class ThumbnailGenerator:
def __init__(self, size):
def __init__(self, size, quality=85, color="FF00FF"):
self.size = (size, size)
self.mime_guesser = ContentMimeGuesser()
self.quality = quality
self.color = tuple(bytes.fromhex(color))
def generate(self, path, dest_path):
@ -32,11 +34,8 @@ class ThumbnailGenerator:
self.generate_image("tmp", dest_path)
os.remove("tmp")
except Exception as e:
print(e)
print("Couldn't make thumbnail for " + path)
# print(dest_path + " - " + str(os.path.getsize(dest_path))) # debug todo remove
def generate_all(self, docs, dest_path, counter: Value=None):
os.makedirs(dest_path, exist_ok=True)
@ -56,8 +55,7 @@ class ThumbnailGenerator:
with Image.open(image_file) as image:
image.thumbnail(self.size, Image.BICUBIC)
canvas = Image.new("RGB", image.size, (255, 0, 255)) # todo get from config
canvas = Image.new("RGB", image.size, self.color)
if image.mode in ('RGBA', 'LA') or (image.mode == 'P' and 'transparency' in image.info):
@ -68,6 +66,6 @@ class ThumbnailGenerator:
else:
canvas.paste(image)
canvas.save(dest_path, "JPEG", quality=85, optimize=True) # todo get qual from config
canvas.save(dest_path, "JPEG", quality=self.quality, optimize=True)
canvas.close()