diff --git a/README.md b/README.md index 1f38c2f..e1d11b7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Simple incremental search tool -Work in progress! Shouldn't be used in production environnments. + +Portable search tool for local files using Elasticsearch. ### Features * Incremental search (Search as you type) @@ -22,30 +23,14 @@ Work in progress! Shouldn't be used in production environnments. Java and python3 are required. Once the web server is running, you can connect to the search interface by typing `localhost:8080` in your browser. -## Setup on Windows +## Setup on Windows/Mac/linux (Python 3.5+) + +* Download and install [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) + ```bash git clone https://github.com/simon987/Simple-Incremental-Search-Tool -cd Projet-Web-2018 -``` -[Download latest elasticsearch version](https://www.elastic.co/downloads/elasticsearch) and extract to `Simple-Incremental-Search-Tool\elasticsearch` - -```bash -sudo pip3 install -r requirements.txt - -python3 run.py -``` - -## Setup on Mac/linux -```bash -git clone https://github.com/simon987/Simple-Incremental-Search-Tool -cd Projet-Web-2018 -wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-6.2.4.zip -unzip elasticsearch-6.2.4.zip -rm elasticsearch-6.2.4.zip -mv elasticsearch-6.2.4 elasticsearch sudo pip3 install -r requirements.txt - python3 run.py ``` diff --git a/crawler.py b/crawler.py index d182043..187cf50 100644 --- a/crawler.py +++ b/crawler.py @@ -144,9 +144,9 @@ class TaskManager: TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), PictureFileParser(chksum_calcs, directory.path), FontParser(chksum_calcs, directory.path), - PdfFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt - DocxParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path), # todo get content len from other opt - EbookParser(chksum_calcs, int(directory.get_option("TextFileContentLength")), directory.path)], # todo get content len from other opt + PdfFileParser(chksum_calcs, int(directory.get_option("PdfFileContentLength")), directory.path), + DocxParser(chksum_calcs, int(directory.get_option("SpreadsheetContentLength")), directory.path), + EbookParser(chksum_calcs, int(directory.get_option("EbookContentLength")), directory.path)], mime_guesser, self.indexer, directory.id) c.crawl(directory.path, counter) diff --git a/indexer.py b/indexer.py index ff5c0f2..19613a2 100644 --- a/indexer.py +++ b/indexer.py @@ -1,10 +1,9 @@ import json + import elasticsearch -from threading import Thread -import subprocess import requests + import config -import platform class Indexer: @@ -14,30 +13,12 @@ class Indexer: self.index_name = index self.es = elasticsearch.Elasticsearch() - try: - requests.head("http://localhost:9200") - - except requests.exceptions.ConnectionError: - import time - t = Thread(target=Indexer.run_elasticsearch) - t.daemon = True - t.start() - - time.sleep(25) - - if self.es.indices.exists(self.index_name): - print("Index is already setup") - else: - print("First time setup...") - self.init() - - @staticmethod - def run_elasticsearch(): - - if platform.system() == "Windows": - subprocess.Popen(["elasticsearch\\bin\\elasticsearch.bat"]) + requests.head("http://localhost:9200") + if self.es.indices.exists(self.index_name): + print("Index is already setup") else: - subprocess.Popen(["elasticsearch/bin/elasticsearch"]) + print("First time setup...") + self.init() @staticmethod def create_bulk_index_string(docs: list, directory: int): diff --git a/parsing.py b/parsing.py index 6563b41..2602a25 100644 --- a/parsing.py +++ b/parsing.py @@ -272,7 +272,7 @@ class TextFileParser(GenericFileParser): "text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch", "text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4", "text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po", - "text/x-makefile", "application/javascript", "application/rtf" + "text/x-makefile", "application/javascript", "application/rtf", "application/json", ] def parse(self, full_path: str): diff --git a/run.py b/run.py index 8382efa..ed13926 100644 --- a/run.py +++ b/run.py @@ -1,17 +1,18 @@ -from flask import Flask, render_template, request, redirect, flash, session, abort, send_file -from storage import Directory, Option, Task, User -from storage import LocalStorage, DuplicateDirectoryException, DuplicateUserException -from crawler import RunningTask, TaskManager import json import os import shutil -import bcrypt -import config -import humanfriendly -from search import Search -from PIL import Image from io import BytesIO +import bcrypt +import humanfriendly +from PIL import Image +from flask import Flask, render_template, request, redirect, flash, session, abort, send_file + +import config +from crawler import TaskManager +from search import Search +from storage import Directory, Option, Task, User +from storage import LocalStorage, DuplicateDirectoryException, DuplicateUserException app = Flask(__name__) app.secret_key = "A very secret key" @@ -22,12 +23,10 @@ search = Search("changeme") def get_dir_size(path): - size = 0 for root, dirs, files in os.walk(path): for filename in files: - full_path = os.path.join(root, filename) size += os.path.getsize(full_path) @@ -36,18 +35,18 @@ def get_dir_size(path): @app.route("/user/") def user_manage(user): - if "admin" in session and session["admin"]: return render_template("user_manage.html", directories=storage.dirs(), user=storage.users()[user]) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") + @app.route("/logout") def logout(): session.pop("username") session.pop("admin") - flash("Déconnection réussie", "success") + flash("Successfully logged out", "success") return redirect("/") @@ -60,16 +59,15 @@ def login(): session["username"] = username session["admin"] = storage.users()[username].admin - flash("Connexion réussie", "success") + flash("Successfully logged in", "success") else: - flash("Nom d'utilisateur ou mot de passe invalide", "danger") + flash("Invalid username or password", "danger") return redirect("/") @app.route("/user") def user_page(): - admin_account_present = False for user in storage.users(): @@ -80,13 +78,12 @@ def user_page(): if not admin_account_present or ("admin" in session and session["admin"]): return render_template("user.html", users=storage.users(), admin_account_present=admin_account_present) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/user//set_access") def user_set_access(username): - if "admin" in session and session["admin"]: dir_id = request.args["dir_id"] user = storage.users()[username] @@ -102,31 +99,29 @@ def user_set_access(username): flash("Permissions mises à jour", "success") return redirect("/user/" + username) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/user//set_admin") def user_set_admin(username): - if "admin" in session and session["admin"]: user = storage.users()[username] if user.username == session["username"]: - flash("Vous n'êtes pas autorisé à changer votre propre type de compte", "warning") + flash("You cannot modifiy your own account", "warning") else: user.admin = request.args["admin"] == "1" storage.update_user(user) - flash("Permissions mises à jour", "success") + flash("Permissions updated", "success") return redirect("/user/" + username) @app.route("/user/add", methods=['POST']) def user_add(): - admin_account_present = False for user in storage.users(): @@ -141,41 +136,40 @@ def user_add(): try: storage.save_user(User(username, password, is_admin)) - flash("Nouvel utilisateur créé", "success") + flash("Created new user", "success") except DuplicateUserException: - flash("L'utilisateur n'as pas pu être créé Assurez vous que le nom d'utilisateur est unique", "danger") + flash("Couldn't create user " + "Make sure that the username is unique", "danger") return redirect("/user") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/user//del") def user_del(username): - if "admin" in session and session["admin"]: if session["username"] == username: - flash("Vous ne pouvez pas supprimer votre propre compte", "warning") + flash("You cannot delete your own account", "warning") return redirect("/user/" + username) else: storage.remove_user(username) - flash("Utilisateur supprimé", "success") + flash("User deleted", "success") return redirect("/user") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") + @app.route("/suggest") def suggest(): - return json.dumps(search.suggest(request.args.get("prefix"))) @app.route("/document/") def document(doc_id): - doc = search.get_doc(doc_id)["_source"] directory = storage.dirs()[doc["directory"]] @@ -186,7 +180,6 @@ def document(doc_id): @app.route("/dl/") def file(doc_id): - doc = search.get_doc(doc_id)["_source"] directory = storage.dirs()[doc["directory"]] @@ -201,7 +194,6 @@ def file(doc_id): @app.route("/file/") def download(doc_id): - doc = search.get_doc(doc_id)["_source"] directory = storage.dirs()[doc["directory"]] extension = "" if doc["extension"] is None or doc["extension"] == "" else "." + doc["extension"] @@ -215,7 +207,6 @@ def download(doc_id): @app.route("/thumb/") def thumb(doc_id): - doc = search.get_doc(doc_id) if doc is not None: @@ -239,9 +230,8 @@ def thumb(doc_id): @app.route("/") def search_page(): - mime_map = search.get_mime_map() - mime_map.append({"id": "any", "text": "Tous"}) + mime_map.append({"id": "any", "text": "All"}) directories = [storage.dirs()[x] for x in get_allowed_dirs(session["username"] if "username" in session else None)] @@ -256,7 +246,6 @@ def search_liste_page(): def get_allowed_dirs(username): - if config.allow_guests: return [x for x in storage.dirs() if x.enabled] else: @@ -269,7 +258,6 @@ def get_allowed_dirs(username): @app.route("/search", methods=['POST']) def search_route(): - query = request.json["q"] query = "" if query is None else query @@ -292,7 +280,6 @@ def search_route(): @app.route("/scroll") def scroll_route(): - scroll_id = request.args.get("scroll_id") page = search.scroll(scroll_id) @@ -302,17 +289,15 @@ def scroll_route(): @app.route("/directory") def dir_list(): - if "admin" in session and session["admin"]: return render_template("directory.html", directories=storage.dirs()) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/directory/add") def directory_add(): - if "admin" in session and session["admin"]: path = request.args.get("path") name = request.args.get("name") @@ -323,19 +308,19 @@ def directory_add(): try: d.set_default_options() storage.save_directory(d) - flash("Dossier créé", "success") + flash("Directory created", "success") except DuplicateDirectoryException: - flash("Le dossier n'a pas pu être créé Assurer vous de choisir un nom unique", "danger") + flash("The directory couldn't be created Make sure to chose a unique name", + "danger") return redirect("/directory") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/directory/") def directory_manage(dir_id): - if "admin" in session and session["admin"]: directory = storage.dirs()[dir_id] tn_size = get_dir_size("static/thumbnails/" + str(dir_id)) @@ -344,13 +329,12 @@ def directory_manage(dir_id): return render_template("directory_manage.html", directory=directory, tn_size=tn_size, tn_size_formatted=tn_size_formatted) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/directory//update") def directory_update(dir_id): - if "admin" in session and session["admin"]: directory = storage.dirs()[dir_id] @@ -369,20 +353,20 @@ def directory_update(dir_id): try: storage.update_directory(updated_dir) - flash("Dossier mis à jour", "success") + flash("Updated directory", "success") except DuplicateDirectoryException: - flash("Le dossier n'a pas pu être mis à jour Assurez vous que le chemin est unique", "danger") + flash("The directory couldn't be updated Make the that the path is unique", + "danger") return redirect("/directory/" + str(dir_id)) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/directory//update_opt") def directory_update_opt(dir_id): - if "admin" in session and session["admin"]: opt_id = request.args.get("id") opt_key = request.args.get("key") @@ -392,7 +376,7 @@ def directory_update_opt(dir_id): return redirect("/directory/" + str(dir_id)) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -404,17 +388,16 @@ def directory_del(dir_id): shutil.rmtree("static/thumbnails/" + str(dir_id)) storage.remove_directory(dir_id) - flash("Dossier supprimé", "success") + flash("Deleted folder", "success") return redirect("/directory") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/directory//reset") def directory_reset(dir_id): - if "admin" in session and session["admin"]: directory = storage.dirs()[dir_id] @@ -431,10 +414,10 @@ def directory_reset(dir_id): search.delete_directory(dir_id) - flash("Options du dossier réinitialisés", "success") + flash("Reset directory options", "success") return redirect("directory/" + str(dir_id)) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -444,7 +427,7 @@ def task(): return render_template("task.html", tasks=storage.tasks(), directories=storage.dirs(), task_list=json.dumps(list(storage.tasks().keys()))) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -457,7 +440,7 @@ def get_current_task(): else: return "" else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -468,17 +451,17 @@ def task_add(): directory = request.args.get("directory") if task_type not in ("1", "2"): - flash("Vous devez choisir un type de tâche", "danger") + flash("Please choose a task type", "danger") return redirect("/task") if directory.isdigit() and int(directory) in storage.dirs(): storage.save_task(Task(task_type, directory)) else: - flash("Vous devez choisir un dossier", "danger") + flash("You must choose a directory", "danger") return redirect("/task") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -492,14 +475,15 @@ def task_del(task_id): return redirect("/task") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @app.route("/reset_es") def reset_es(): if "admin" in session and session["admin"]: - flash("Elasticsearch a été réinitialisé, les changements dans config.py ont été appliqués", "success") + flash("Elasticsearch index has been reset. Modifications made in config.py have been applied.", + "success") tm.indexer.init() if os.path.exists("static/thumbnails"): @@ -507,7 +491,7 @@ def reset_es(): return redirect("/dashboard") else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") @@ -534,7 +518,7 @@ def dashboard(): index_size=humanfriendly.format_size(search.get_index_size())) else: - flash("Vous n'êtes pas autorisé à accéder à cette page", "warning") + flash("You are not authorized to access this page", "warning") return redirect("/") diff --git a/static/js/search.js b/static/js/search.js index e013798..728e163 100644 --- a/static/js/search.js +++ b/static/js/search.js @@ -73,7 +73,7 @@ function makeStatsCard(searchResult) { statsCardBody.setAttribute("class", "card-body"); let stat = document.createElement("p"); - stat.appendChild(document.createTextNode(searchResult["hits"]["total"] + " résultats en " + searchResult["took"] + "ms")); + stat.appendChild(document.createTextNode(searchResult["hits"]["total"] + " results in " + searchResult["took"] + "ms")); let sizeStat = document.createElement("span"); sizeStat.appendChild(document.createTextNode(humanFileSize(searchResult["aggregations"]["total_size"]["value"]))); @@ -267,7 +267,7 @@ function createDocCard(hit) { } thumbnailOverlay.appendChild(resolutionBadge); - var format = hit["_source"]["format_name"]; + var format = hit["_source"]["format"]; //Hover if(format === "GIF") { diff --git a/storage.py b/storage.py index a90add7..06c8933 100644 --- a/storage.py +++ b/storage.py @@ -7,6 +7,9 @@ import config class CheckSumCalculator: + def __init__(self): + pass + def checksum(self, string: str): return flask_bcrypt.generate_password_hash(string, config.bcrypt_rounds) diff --git a/templates/dashboard.html b/templates/dashboard.html index 75b47f8..6a75952 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -5,7 +5,7 @@
-
Information globale
+
Global information
@@ -14,45 +14,45 @@ - + - + - + - + - + - + - +
{{ version }}
Taille totale des miniaturesTotal thumbnail cache size
{{ tn_size_total }}
Nombre de documents totalsTotal document count
{{ doc_count }}
Taille totale des documents indexésTotal size of indexed documents
{{ doc_size }}
Taille totale de l'indextotal index size
{{ index_size }}
Nombre d'utilisateursUser count
1
Chemin de la base de donnée SQLiteSQLite database path
{{ db_path }}
Adresse ElasticsearchElasticsearch URL
{{ elasticsearch_url }}
- +
Actions
@@ -60,8 +60,8 @@
-
Tâches en cours
+
Ongoing tasks
{% for task_id in tasks | sort() %}
{{ directories[tasks[task_id].dir_id].name }} - {% if tasks[task_id].type == 1 %} - Indexation + Indexing {% else %} - Génération des miniatures + Thumbnail generation {% endif %} @@ -124,12 +124,12 @@
- En attente + Queued
- +
{% endfor %} diff --git a/templates/user.html b/templates/user.html index 0d67788..fb0d7c7 100644 --- a/templates/user.html +++ b/templates/user.html @@ -1,16 +1,16 @@ {% extends "layout.html" %} {% set active_page = "user" %} -{% block title %}Liste des utilisateurs{% endblock title %} +{% block title %}User list{% endblock title %} {% block body %}
-
Créer un utilisateur
+
Create user
{% if not admin_account_present %} -

Cette page est débloquée parce qu'il n'y a aucun compte administrateur

+

This page is unlocked because there are no admin accounts

{% endif %}
@@ -18,37 +18,37 @@
- - + +
- +
- +
- +
-
Utilisateurs
+
Users
- - + + {% for user in users %} - + {% endfor %} diff --git a/templates/user_manage.html b/templates/user_manage.html index 94dd39b..895cb4b 100644 --- a/templates/user_manage.html +++ b/templates/user_manage.html @@ -7,19 +7,19 @@
-
Gérer les permission de {{ user.username }}
+
Manage permissions of {{ user.username }}
-
Administrateur:
+
Admin:
@@ -31,8 +31,8 @@
UtilisateurAdministrateurUserAdmin Actions
{% if session["username"] == user %}{{ user }}{% else %}{{ user }}{% endif %} GérerManage
- - + + @@ -43,7 +43,7 @@ @@ -53,15 +53,14 @@
DossierAccèsDirectoryAccess

- - +