From 98aa258c6a0191e662938a3b8036ed657a18e9a1 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 24 Mar 2018 19:26:54 -0400 Subject: [PATCH] Basic searching --- crawler.py | 5 +- run.py | 57 ++++++- search.py | 18 ++ templates/document.html | 55 ++++++ templates/layout.html | 2 +- templates/search.html | 364 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 495 insertions(+), 6 deletions(-) create mode 100644 templates/document.html create mode 100644 templates/search.html diff --git a/crawler.py b/crawler.py index 06bdb67..5879e16 100644 --- a/crawler.py +++ b/crawler.py @@ -132,8 +132,9 @@ class TaskManager: def execute_thumbnails(self, directory: Directory, total_files: Value, counter: Value, done: Value): - dest_path = os.path.join("thumbnails", str(directory.id)) - shutil.rmtree(dest_path) + dest_path = os.path.join("static/thumbnails", str(directory.id)) + if os.path.exists(dest_path): + shutil.rmtree(dest_path) docs = list(Search("changeme").get_all_documents(directory.id)) diff --git a/run.py b/run.py index 5a752e0..96f714e 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,4 @@ -from flask import Flask, render_template, request, redirect, flash, session +from flask import Flask, render_template, request, redirect, flash, session, abort, send_file from storage import Directory, Option, Task from storage import LocalStorage, DuplicateDirectoryException from crawler import RunningTask, TaskManager @@ -28,9 +28,60 @@ def get_dir_size(path): return size +@app.route("/document/") +def document(doc_id): + + doc = search.get_doc(doc_id)["_source"] + directory = storage.dirs()[doc["directory"]] + + del doc["directory"] + + return render_template("document.html", doc=doc, directory=directory, doc_id=doc_id) + + +@app.route("/file/") +def file(doc_id): + + doc = search.get_doc(doc_id)["_source"] + directory = storage.dirs()[doc["directory"]] + + full_path = os.path.join(directory.path, doc["path"], doc["name"]) + + return send_file(full_path) + + +@app.route("/thumb//") +def thumb(dir_id, doc_id): + + if dir_id in storage.dirs(): + + return app.send_static_file(os.path.join("thumbnails/", str(dir_id), doc_id)) + + else: + abort(404) + + @app.route("/") -def tmp_route(): - return "huh" +def search_page(): + return render_template("search.html") + + +@app.route("/search") +def search_route(): + + page = search.search() + + return json.dumps(page) + + +@app.route("/scroll") +def scroll_route(): + + scroll_id = request.args.get("scroll_id") + + page = search.scroll(scroll_id) + + return json.dumps(page) @app.route("/directory") diff --git a/search.py b/search.py index b443579..e05dddb 100644 --- a/search.py +++ b/search.py @@ -16,6 +16,8 @@ class Search: except: print("elasticsearch is not running") + self.search_iterator = None + def get_all_documents(self, dir_id: int): return helpers.scan(client=self.es, @@ -47,3 +49,19 @@ class Search: return int(parsed_info["indices"][self.index_name]["primaries"]["indexing"]["index_total"]) except: return 0 + + def search(self): + page = self.es.search(body={"query": {"term": {"directory": 1}}, "size": 30}, + index=self.index_name, scroll="3m") + + return page + + def scroll(self, scroll_id): + + page = self.es.scroll(scroll_id=scroll_id, scroll="3m") + + return page + + def get_doc(self, doc_id): + + return self.es.get(index=self.index_name, id=doc_id, doc_type="file") diff --git a/templates/document.html b/templates/document.html new file mode 100644 index 0000000..548260f --- /dev/null +++ b/templates/document.html @@ -0,0 +1,55 @@ +{% extends "layout.html" %} + +{% block body %} + + + +
+ +
+ +
{{ doc.name }}
+
+

Document properties

+ + + + + {% for key in doc %} + + + + + {% endfor %} + +
{{ key }}
{{ doc[key] }}
+ +
+ +

Raw json

+ + +
+ +

Directory

+ + + + + + + + + + + + +
Path
{{ directory.path }}
Name
{{ directory.name }}
+ +
+
+ +
+ + +{% endblock body %} \ No newline at end of file diff --git a/templates/layout.html b/templates/layout.html index 1d3c71a..b8162f7 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -41,7 +41,7 @@