Initial commit

This commit is contained in:
simon 2018-01-22 20:26:19 -05:00
commit caad7cd458
4 changed files with 280 additions and 0 deletions

64
crawler.py Normal file
View File

@ -0,0 +1,64 @@
import os
import hashlib
import mimetypes
from PIL import Image
import simplejson
rootDir = "/home/simon/Documents"
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
def md5sum(filename, block_size=65536):
hash = hashlib.md5()
with open(filename, "rb") as f:
for block in iter(lambda: f.read(block_size), b""):
hash.update(block)
return hash.hexdigest()
def crawl(root_dir):
docs = []
for root, subdirs, files in os.walk(root_dir):
print(root)
for filename in files:
full_path = os.path.join(root, filename)
doc = dict()
doc["md5"] = md5sum(os.path.join(root, filename))
doc["path"] = root
doc["name"] = filename
doc["size"] = os.path.getsize(full_path)
doc["mtime"] = int(os.path.getmtime(full_path))
mime_type = mimetypes.guess_type(full_path)[0]
if mime_type is not None:
doc["mime"] = mime_type
if mime_type.startswith("image"):
try:
width, height = Image.open(full_path).size
doc["width"] = width
doc["height"] = height
except OSError:
doc.pop('mime', None)
pass
except ValueError:
doc.pop('mime', None)
pass
docs.append(doc)
file = open("crawler.json", "w")
file.write(simplejson.dumps(docs))
file.close()
crawl(rootDir)

16
templates/index.html Normal file
View File

@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<form action="/search">
<input name="query">
<input type="number" name="page" value="0">
<input type="number" name="per_page" value="50">
<input type="submit" value="Search">
</form>
</body>
</html>

71
templates/search.html Normal file
View File

@ -0,0 +1,71 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<style>
.fit:hover {
-webkit-transform:scale(2.5);
-moz-transform:scale(2.5);
-ms-transform:scale(2.5);
-o-transform:scale(2.5);
transform:scale(2.5);
}
.fit {
width: 100%;
height: 100%;
}
.image-container{
width: 305px;
height: 300px;
background-color: #ccc;
overflow: hidden;
}
.doc-container {
height: 330px;
display: inline-block;
}
.doc-caption {
display: inline-block;
}
</style>
<div class="photos">
{% for doc in docs %}
{% if doc.type == "audio" %}
<div class="image-container">
<audio controls class="fit">
<!--<source src="files/{{doc.doc_id}}">-->
</audio>
</div>
{% else %}
<a href="/files/{{doc.doc_id}}">
<div class="doc-container">
<div class="image-container">
<img class="fit" src="/thumbs/{{doc.doc_id}}">
</div>
<span class="doc-caption" style="font-size: 8pt">{{doc.name}}</span>
</div>
</a>
{% endif %}
{% endfor %}
</div>
</body>
</html>

129
webserver.py Normal file
View File

@ -0,0 +1,129 @@
from flask import Flask, render_template, send_file, request
import pysolr
import mimetypes
import requests
import json
from PIL import Image
import os
SOLR_URL = "http://localhost:8983/solr/test/"
solr = pysolr.Solr(SOLR_URL, timeout=10)
app = Flask(__name__)
class Document:
def __init__(self, doc_id, name, path, size, md5):
self.doc_id = doc_id
self.name = name
self.path = path
self.size = size
self.md5 = md5
class ImageDocument(Document):
def __init__(self, doc_id, name, path, size, md5):
super().__init__(doc_id, name, path, size, md5)
self.type = "image"
class AudioClipDocument(Document):
def __init__(self, doc_id, name, path, size, md5):
super().__init__(doc_id, name, path, size, md5)
self.type = "audio"
def get_document(id):
response = requests.get(SOLR_URL + "get?id=" + id)
return json.loads(response.text)["doc"]
def make_thumb(doc):
size = (1024, 1024)
thumb_path = "thumbnails/" + doc["id"]
if not os.path.exists(thumb_path):
file_path = doc["path"][0] + "/" + doc["name"][0]
if doc["width"][0] > size[0]:
image = Image.open(file_path)
image.thumbnail(size, Image.ANTIALIAS)
if image.mode == "RGB":
image.save(thumb_path, "JPEG")
elif image.mode == "RGBA":
image.save(thumb_path, "PNG")
else:
image = image.convert("RGB")
image.save(thumb_path, "JPEG")
else:
print("Skipping thumbnail")
os.symlink(file_path, thumb_path)
return "thumbnails/" + doc["id"]
@app.route("/search/")
def search():
query = request.args.get("query")
page = int(request.args.get("page"))
per_page = int(request.args.get("per_page"))
results = solr.search(query, None, rows=per_page, start=per_page * page)
docs = []
for r in results:
if "mime" in r:
mime_type = r["mime"][0]
else:
mime_type = ""
if mime_type.startswith("image"):
docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
elif mime_type.startswith("audio"):
docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
return render_template("search.html", docs=docs)
@app.route("/")
def index():
return render_template("index.html")
@app.route("/files/<id>/")
def files(id):
doc = get_document(id)
if doc is not None:
file_path = doc["path"][0] + "/" + doc["name"][0]
return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
else:
return "File not found"
@app.route("/thumbs/<doc_id>/")
def thumbs(doc_id):
doc = get_document(doc_id)
if doc is not None:
thumb_path = make_thumb(doc)
return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
else:
return "File not found"
app.run("0.0.0.0", 8080)