mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-10 05:56:44 +00:00
Initial commit
This commit is contained in:
commit
caad7cd458
64
crawler.py
Normal file
64
crawler.py
Normal file
@ -0,0 +1,64 @@
|
||||
import os
|
||||
import hashlib
|
||||
import mimetypes
|
||||
from PIL import Image
|
||||
import simplejson
|
||||
|
||||
rootDir = "/home/simon/Documents"
|
||||
|
||||
|
||||
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
||||
def md5sum(filename, block_size=65536):
|
||||
hash = hashlib.md5()
|
||||
with open(filename, "rb") as f:
|
||||
for block in iter(lambda: f.read(block_size), b""):
|
||||
hash.update(block)
|
||||
return hash.hexdigest()
|
||||
|
||||
|
||||
def crawl(root_dir):
|
||||
|
||||
docs = []
|
||||
|
||||
for root, subdirs, files in os.walk(root_dir):
|
||||
|
||||
print(root)
|
||||
|
||||
for filename in files:
|
||||
full_path = os.path.join(root, filename)
|
||||
|
||||
doc = dict()
|
||||
|
||||
doc["md5"] = md5sum(os.path.join(root, filename))
|
||||
doc["path"] = root
|
||||
doc["name"] = filename
|
||||
doc["size"] = os.path.getsize(full_path)
|
||||
doc["mtime"] = int(os.path.getmtime(full_path))
|
||||
|
||||
mime_type = mimetypes.guess_type(full_path)[0]
|
||||
|
||||
if mime_type is not None:
|
||||
|
||||
doc["mime"] = mime_type
|
||||
|
||||
if mime_type.startswith("image"):
|
||||
try:
|
||||
width, height = Image.open(full_path).size
|
||||
|
||||
doc["width"] = width
|
||||
doc["height"] = height
|
||||
except OSError:
|
||||
doc.pop('mime', None)
|
||||
pass
|
||||
except ValueError:
|
||||
doc.pop('mime', None)
|
||||
pass
|
||||
|
||||
docs.append(doc)
|
||||
|
||||
file = open("crawler.json", "w")
|
||||
file.write(simplejson.dumps(docs))
|
||||
file.close()
|
||||
|
||||
|
||||
crawl(rootDir)
|
16
templates/index.html
Normal file
16
templates/index.html
Normal file
@ -0,0 +1,16 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
<form action="/search">
|
||||
<input name="query">
|
||||
<input type="number" name="page" value="0">
|
||||
<input type="number" name="per_page" value="50">
|
||||
<input type="submit" value="Search">
|
||||
</form>
|
||||
|
||||
</body>
|
||||
</html>
|
71
templates/search.html
Normal file
71
templates/search.html
Normal file
@ -0,0 +1,71 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<style>
|
||||
.fit:hover {
|
||||
-webkit-transform:scale(2.5);
|
||||
-moz-transform:scale(2.5);
|
||||
-ms-transform:scale(2.5);
|
||||
-o-transform:scale(2.5);
|
||||
transform:scale(2.5);
|
||||
|
||||
}
|
||||
|
||||
.fit {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.image-container{
|
||||
width: 305px;
|
||||
height: 300px;
|
||||
background-color: #ccc;
|
||||
overflow: hidden;
|
||||
|
||||
}
|
||||
|
||||
.doc-container {
|
||||
height: 330px;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.doc-caption {
|
||||
display: inline-block;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="photos">
|
||||
|
||||
{% for doc in docs %}
|
||||
{% if doc.type == "audio" %}
|
||||
<div class="image-container">
|
||||
<audio controls class="fit">
|
||||
<!--<source src="files/{{doc.doc_id}}">-->
|
||||
</audio>
|
||||
</div>
|
||||
{% else %}
|
||||
<a href="/files/{{doc.doc_id}}">
|
||||
<div class="doc-container">
|
||||
<div class="image-container">
|
||||
<img class="fit" src="/thumbs/{{doc.doc_id}}">
|
||||
</div>
|
||||
<span class="doc-caption" style="font-size: 8pt">{{doc.name}}</span>
|
||||
|
||||
</div>
|
||||
|
||||
</a>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
|
||||
</html>
|
||||
|
129
webserver.py
Normal file
129
webserver.py
Normal file
@ -0,0 +1,129 @@
|
||||
from flask import Flask, render_template, send_file, request
|
||||
import pysolr
|
||||
import mimetypes
|
||||
import requests
|
||||
import json
|
||||
from PIL import Image
|
||||
import os
|
||||
|
||||
SOLR_URL = "http://localhost:8983/solr/test/"
|
||||
|
||||
solr = pysolr.Solr(SOLR_URL, timeout=10)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
class Document:
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
self.doc_id = doc_id
|
||||
self.name = name
|
||||
self.path = path
|
||||
self.size = size
|
||||
self.md5 = md5
|
||||
|
||||
|
||||
class ImageDocument(Document):
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
super().__init__(doc_id, name, path, size, md5)
|
||||
self.type = "image"
|
||||
|
||||
|
||||
class AudioClipDocument(Document):
|
||||
def __init__(self, doc_id, name, path, size, md5):
|
||||
super().__init__(doc_id, name, path, size, md5)
|
||||
self.type = "audio"
|
||||
|
||||
|
||||
def get_document(id):
|
||||
|
||||
response = requests.get(SOLR_URL + "get?id=" + id)
|
||||
|
||||
return json.loads(response.text)["doc"]
|
||||
|
||||
|
||||
def make_thumb(doc):
|
||||
size = (1024, 1024)
|
||||
|
||||
thumb_path = "thumbnails/" + doc["id"]
|
||||
|
||||
if not os.path.exists(thumb_path):
|
||||
|
||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
|
||||
if doc["width"][0] > size[0]:
|
||||
|
||||
image = Image.open(file_path)
|
||||
image.thumbnail(size, Image.ANTIALIAS)
|
||||
|
||||
if image.mode == "RGB":
|
||||
image.save(thumb_path, "JPEG")
|
||||
elif image.mode == "RGBA":
|
||||
image.save(thumb_path, "PNG")
|
||||
else:
|
||||
image = image.convert("RGB")
|
||||
image.save(thumb_path, "JPEG")
|
||||
else:
|
||||
print("Skipping thumbnail")
|
||||
os.symlink(file_path, thumb_path)
|
||||
|
||||
return "thumbnails/" + doc["id"]
|
||||
|
||||
|
||||
@app.route("/search/")
|
||||
def search():
|
||||
|
||||
query = request.args.get("query")
|
||||
page = int(request.args.get("page"))
|
||||
per_page = int(request.args.get("per_page"))
|
||||
|
||||
results = solr.search(query, None, rows=per_page, start=per_page * page)
|
||||
|
||||
docs = []
|
||||
for r in results:
|
||||
|
||||
if "mime" in r:
|
||||
mime_type = r["mime"][0]
|
||||
else:
|
||||
mime_type = ""
|
||||
|
||||
if mime_type.startswith("image"):
|
||||
docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
|
||||
elif mime_type.startswith("audio"):
|
||||
docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
|
||||
|
||||
return render_template("search.html", docs=docs)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("index.html")
|
||||
|
||||
|
||||
@app.route("/files/<id>/")
|
||||
def files(id):
|
||||
|
||||
doc = get_document(id)
|
||||
|
||||
if doc is not None:
|
||||
file_path = doc["path"][0] + "/" + doc["name"][0]
|
||||
return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
|
||||
else:
|
||||
return "File not found"
|
||||
|
||||
|
||||
@app.route("/thumbs/<doc_id>/")
|
||||
def thumbs(doc_id):
|
||||
|
||||
doc = get_document(doc_id)
|
||||
|
||||
if doc is not None:
|
||||
|
||||
thumb_path = make_thumb(doc)
|
||||
|
||||
return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
|
||||
else:
|
||||
return "File not found"
|
||||
|
||||
|
||||
app.run("0.0.0.0", 8080)
|
Loading…
x
Reference in New Issue
Block a user