From caad7cd4584082097f4bf4d436a33404deedcf6b Mon Sep 17 00:00:00 2001
From: simon <Anise5-humid5-Allow-5Lulu-Honk>
Date: Mon, 22 Jan 2018 20:26:19 -0500
Subject: [PATCH] Initial commit

---
 crawler.py            |  64 +++++++++++++++++++++
 templates/index.html  |  16 ++++++
 templates/search.html |  71 +++++++++++++++++++++++
 webserver.py          | 129 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 280 insertions(+)
 create mode 100644 crawler.py
 create mode 100644 templates/index.html
 create mode 100644 templates/search.html
 create mode 100644 webserver.py
diff --git a/crawler.py b/crawler.py
new file mode 100644
index 0000000..479ea45
--- /dev/null
+++ b/crawler.py
@@ -0,0 +1,64 @@
+import os
+import hashlib
+import mimetypes
+from PIL import Image
+import simplejson
+
+rootDir = "/home/simon/Documents"
+
+
+# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
+def md5sum(filename, block_size=65536):
+    hash = hashlib.md5()
+    with open(filename, "rb") as f:
+        for block in iter(lambda: f.read(block_size), b""):
+            hash.update(block)
+    return hash.hexdigest()
+
+
+def crawl(root_dir):
+
+    docs = []
+
+    for root, subdirs, files in os.walk(root_dir):
+
+        print(root)
+
+        for filename in files:
+            full_path = os.path.join(root, filename)
+
+            doc = dict()
+
+            doc["md5"] = md5sum(os.path.join(root, filename))
+            doc["path"] = root
+            doc["name"] = filename
+            doc["size"] = os.path.getsize(full_path)
+            doc["mtime"] = int(os.path.getmtime(full_path))
+
+            mime_type = mimetypes.guess_type(full_path)[0]
+
+            if mime_type is not None:
+
+                doc["mime"] = mime_type
+
+                if mime_type.startswith("image"):
+                    try:
+                        width, height = Image.open(full_path).size
+
+                        doc["width"] = width
+                        doc["height"] = height
+                    except OSError:
+                        doc.pop('mime', None)
+                        pass
+                    except ValueError:
+                        doc.pop('mime', None)
+                        pass
+
+            docs.append(doc)
+
+    file = open("crawler.json", "w")
+    file.write(simplejson.dumps(docs))
+    file.close()
+
+
+crawl(rootDir)
\ No newline at end of file
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..56fa48f
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Title</title>
+</head>
+<body>
+    <form action="/search">
+        <input name="query">
+        <input type="number" name="page" value="0">
+        <input type="number" name="per_page" value="50">
+        <input type="submit" value="Search">
+    </form>
+
+</body>
+</html>
\ No newline at end of file
diff --git a/templates/search.html b/templates/search.html
new file mode 100644
index 0000000..dc28ba9
--- /dev/null
+++ b/templates/search.html
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Title</title>
+</head>
+<body>
+
+<style>
+    .fit:hover {
+        -webkit-transform:scale(2.5);
+        -moz-transform:scale(2.5);
+        -ms-transform:scale(2.5);
+        -o-transform:scale(2.5);
+        transform:scale(2.5);
+
+    }
+
+    .fit {
+        width: 100%;
+        height: 100%;
+    }
+
+    .image-container{
+        width: 305px;
+        height: 300px;
+        background-color: #ccc;
+        overflow: hidden;
+
+    }
+
+    .doc-container {
+        height: 330px;
+        display: inline-block;
+    }
+
+    .doc-caption {
+        display: inline-block;
+    }
+</style>
+
+<div class="photos">
+
+    {% for doc in docs %}
+    {% if doc.type == "audio" %}
+    <div class="image-container">
+        <audio controls class="fit">
+            <!--<source src="files/{{doc.doc_id}}">-->
+        </audio>
+    </div>
+    {% else %}
+    <a href="/files/{{doc.doc_id}}">
+        <div class="doc-container">
+            <div class="image-container">
+                <img class="fit" src="/thumbs/{{doc.doc_id}}">
+            </div>
+            <span class="doc-caption" style="font-size: 8pt">{{doc.name}}</span>
+
+        </div>
+
+    </a>
+    {% endif %}
+    {% endfor %}
+
+</div>
+
+</body>
+
+
+</html>
+
diff --git a/webserver.py b/webserver.py
new file mode 100644
index 0000000..df8f15b
--- /dev/null
+++ b/webserver.py
@@ -0,0 +1,129 @@
+from flask import Flask, render_template, send_file, request
+import pysolr
+import mimetypes
+import requests
+import json
+from PIL import Image
+import os
+
+SOLR_URL = "http://localhost:8983/solr/test/"
+
+solr = pysolr.Solr(SOLR_URL, timeout=10)
+
+app = Flask(__name__)
+
+
+class Document:
+    def __init__(self, doc_id, name, path, size, md5):
+        self.doc_id = doc_id
+        self.name = name
+        self.path = path
+        self.size = size
+        self.md5 = md5
+
+
+class ImageDocument(Document):
+    def __init__(self, doc_id, name, path, size, md5):
+        super().__init__(doc_id, name, path, size, md5)
+        self.type = "image"
+
+
+class AudioClipDocument(Document):
+    def __init__(self, doc_id, name, path, size, md5):
+        super().__init__(doc_id, name, path, size, md5)
+        self.type = "audio"
+
+
+def get_document(id):
+
+    response = requests.get(SOLR_URL + "get?id=" + id)
+
+    return json.loads(response.text)["doc"]
+
+
+def make_thumb(doc):
+    size = (1024, 1024)
+
+    thumb_path = "thumbnails/" + doc["id"]
+
+    if not os.path.exists(thumb_path):
+
+        file_path = doc["path"][0] + "/" + doc["name"][0]
+
+        if doc["width"][0] > size[0]:
+
+            image = Image.open(file_path)
+            image.thumbnail(size, Image.ANTIALIAS)
+
+            if image.mode == "RGB":
+                image.save(thumb_path, "JPEG")
+            elif image.mode == "RGBA":
+                image.save(thumb_path, "PNG")
+            else:
+                image = image.convert("RGB")
+                image.save(thumb_path, "JPEG")
+        else:
+            print("Skipping thumbnail")
+            os.symlink(file_path, thumb_path)
+
+    return "thumbnails/" + doc["id"]
+
+
+@app.route("/search/")
+def search():
+
+    query = request.args.get("query")
+    page = int(request.args.get("page"))
+    per_page = int(request.args.get("per_page"))
+
+    results = solr.search(query, None, rows=per_page, start=per_page * page)
+
+    docs = []
+    for r in results:
+
+        if "mime" in r:
+            mime_type = r["mime"][0]
+        else:
+            mime_type = ""
+
+        if mime_type.startswith("image"):
+            docs.append(ImageDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
+
+        elif mime_type.startswith("audio"):
+            docs.append(AudioClipDocument(r["id"], r["name"][0], r["path"][0], r["size"], r["md5"]))
+
+    return render_template("search.html", docs=docs)
+
+
+@app.route("/")
+def index():
+    return render_template("index.html")
+
+
+@app.route("/files/<id>/")
+def files(id):
+
+    doc = get_document(id)
+
+    if doc is not None:
+        file_path = doc["path"][0] + "/" + doc["name"][0]
+        return send_file(file_path, mimetype=mimetypes.guess_type(file_path)[0])
+    else:
+        return "File not found"
+
+
+@app.route("/thumbs/<doc_id>/")
+def thumbs(doc_id):
+
+    doc = get_document(doc_id)
+
+    if doc is not None:
+
+        thumb_path = make_thumb(doc)
+
+        return send_file("thumbnails/" + doc_id, mimetype=mimetypes.guess_type(thumb_path)[0])
+    else:
+        return "File not found"
+
+
+app.run("0.0.0.0", 8080)
\ No newline at end of file