Basic searching integrated with elasticsearch + highlighting

This commit is contained in:
Simon 2018-06-12 16:29:05 -04:00
parent af2601ee70
commit 0127b3a51d
4 changed files with 43 additions and 29 deletions

7
app.py
View File

@ -8,6 +8,7 @@ import od_util
import config import config
from flask_caching import Cache from flask_caching import Cache
from task import TaskDispatcher, Task from task import TaskDispatcher, Task
from search.search import ElasticSearchEngine
app = Flask(__name__) app = Flask(__name__)
recaptcha = ReCaptcha(app=app, recaptcha = ReCaptcha(app=app,
@ -21,6 +22,7 @@ app.jinja_env.globals.update(get_color=od_util.get_color)
app.jinja_env.globals.update(get_mime=od_util.get_mime) app.jinja_env.globals.update(get_mime=od_util.get_mime)
taskDispatcher = TaskDispatcher() taskDispatcher = TaskDispatcher()
searchEngine = ElasticSearchEngine("od-database")
@app.template_filter("datetime_format") @app.template_filter("datetime_format")
@ -108,14 +110,15 @@ def search():
if q: if q:
try: try:
# hits = sea.search(q, per_page, page, sort_order) # hits = sea.search(q, per_page, page, sort_order)
print("FIXME: Search") hits = searchEngine.search(q, page, per_page)
hits = []
except InvalidQueryException as e: except InvalidQueryException as e:
flash("<strong>Invalid query:</strong> " + str(e), "warning") flash("<strong>Invalid query:</strong> " + str(e), "warning")
return redirect("/search") return redirect("/search")
else: else:
hits = None hits = None
print(hits)
return render_template("search.html", return render_template("search.html",
results=hits, q=q, p=page, sort_order=sort_order, results=hits, q=q, p=page, sort_order=sort_order,
per_page=per_page, results_set=config.RESULTS_PER_PAGE) per_page=per_page, results_set=config.RESULTS_PER_PAGE)

View File

@ -109,9 +109,10 @@ class ElasticSearchEngine(SearchEngine):
return "\n".join("".join([action_string, doc[:-1], website_id_string]) for doc in docs) return "\n".join("".join([action_string, doc[:-1], website_id_string]) for doc in docs)
def search(self, query) -> {}: def search(self, query, page, per_page) -> {}:
filters = [] filters = []
sort_by = ["_score"]
page = self.es.search(body={ page = self.es.search(body={
"query": { "query": {
@ -126,22 +127,15 @@ class ElasticSearchEngine(SearchEngine):
"filter": filters "filter": filters
} }
}, },
"sort": [ "sort": sort_by,
"_score"
],
"highlight": { "highlight": {
"fields": { "fields": {
"name": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]}, "name": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]},
"path": {"pre_tags": ["<span class='hl'>"], "post_tags": ["</span>"]}
} }
}, },
"size": 40}, index=self.index_name, scroll="8m") "size": per_page, "from": page * per_page}, index=self.index_name)
# todo get scroll time from config # todo get scroll time from config
# todo get size from config # todo get size from config
return page return page
def scroll(self, scroll_id) -> {}:
try:
return self.es.scroll(scroll_id=scroll_id, scroll="3m") # todo get scroll time from config
except TransportError:
return None

View File

@ -73,3 +73,6 @@
100% { 100% {
background-color: rgba(255, 255, 255, 0.6); } } background-color: rgba(255, 255, 255, 0.6); } }
.hl {
background: #fff217;
}

View File

@ -18,10 +18,17 @@
<div class="form-group col-md-2"> <div class="form-group col-md-2">
<select class="form-control" name="sort_order" title="Sort order"> <select class="form-control" name="sort_order" title="Sort order">
<option disabled>Select sort order</option> <option disabled>Select sort order</option>
<option value="score" {{ "selected" if sort_order == "score" else "" }}>Relevance</option> <option value="score" {{ "selected" if sort_order == "score" else "" }}>Relevance
<option value="size_asc" {{ "selected" if sort_order == "size_asc" else "" }}>Size ascending</option> </option>
<option value="size_dsc" {{ "selected" if sort_order == "size_dsc" else "" }}>Size descending</option> <option value="size_asc" {{ "selected" if sort_order == "size_asc" else "" }}>Size
<option value="none" {{ "selected" if sort_order == "none" else "" }}>No order (faster)</option> ascending
</option>
<option value="size_dsc" {{ "selected" if sort_order == "size_dsc" else "" }}>Size
descending
</option>
<option value="none" {{ "selected" if sort_order == "none" else "" }}>No order
(faster)
</option>
</select> </select>
</div> </div>
<div class="form-group col-md-2"> <div class="form-group col-md-2">
@ -40,7 +47,7 @@
</div> </div>
</div> </div>
{% if results %} {% if results["hits"]["total"] > 0 %}
<div class="card"> <div class="card">
<div class="card-body"> <div class="card-body">
@ -48,39 +55,46 @@
<table class="table"> <table class="table">
<tbody> <tbody>
{% for hit in results %} {% for hit in results["hits"]["hits"] %}
{% set path = hit[1] + hit[2] %} {% set src = hit["_source"] %}
{% set hl_name = hit["highlight"]["name"][0] if "name" in hit["highlight"] else src["name"] %}
{% set hl_path = hit["highlight"]["path"][0] if "path" in hit["highlight"] else src["path"] %}
{# TODO: website url + path #}
{% set path = src["path"] %}
<tr> <tr>
<td> <td>
{# File name & link #} {# File name & link #}
<a href="{{ path + "/" + hit[3] }}" title="{{ hit[3] }}"> <a href="{{ path + "/" + src["name"] }}" title="{{ src["name"] }}">
{{ hit[3] | truncate(50, True, "..>") }} {{ hl_name |safe }}
</a> </a>
{# File type badge #} {# File type badge #}
{% set mime = get_mime(hit[3]) %} {% set mime = get_mime(src["path"]) %}
{% if mime %} {% if mime %}
<span class="badge badge-pill {{ get_color(mime) }}"> <span class="badge badge-pill {{ get_color(mime) }}">
{{ hit[3][hit[3].rfind(".") + 1:] }} {{ src["path"][src["path"].rfind(".") + 1:] }}
</span> </span>
{% endif %} {% endif %}
{# File path #} {# File path #}
<div class="text-muted" title="{{ path }}" style="font-size: 10px;"> <div class="text-muted" title="{{ path }}" style="font-size: 10px;">
<a style="color: #6c757d" title="See files from this website" <a style="color: #6c757d" title="See files from this website"
href="/website/{{ hit[4] }}">{{ hit[1] }}</a>{{ truncate_path(hit[2], 60) }} {# todo: website url #}
href="/website/{{ src["website_id"] }}">{{ hl_path | safe }}</a>{{ truncate_path(src["path"], 60) }}
</div> </div>
</td> </td>
{# File size #} {# File size #}
<td style="white-space: nowrap; vertical-align: top; text-align: right; font-size: 14px"> <td style="white-space: nowrap; vertical-align: top; text-align: right; font-size: 14px">
{{ hit[0] | filesizeformat if hit[0] >= 0 else "?"}} {{ src["size"] | filesizeformat if src["size"] >= 0 else "?" }}
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div> </div>
{% if results["hits"]["total"] > (p + 1) * per_page %}
<a href="/search?q={{ q }}&p={{ p + 1 }}&sort_order={{ sort_order }}&per_page={{ per_page }}" <a href="/search?q={{ q }}&p={{ p + 1 }}&sort_order={{ sort_order }}&per_page={{ per_page }}"
class="btn btn-primary" style="float: right">Next</a> class="btn btn-primary" style="float: right">Next</a>
{% endif %}
{% if p > 0 %} {% if p > 0 %}
<a href="/search?q={{ q }}&p={{ p - 1 }}&sort_order={{ sort_order }}&per_page={{ per_page }}" <a href="/search?q={{ q }}&p={{ p - 1 }}&sort_order={{ sort_order }}&per_page={{ per_page }}"
class="btn btn-primary">Previous</a> class="btn btn-primary">Previous</a>