From 8ffd9179d2d09a657c003d797e9090d385e0580c Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 9 Aug 2018 14:26:22 -0400 Subject: [PATCH] Increased stats timeout value --- app.py | 33 +++++++++++++++++++++++++++++++++ callbacks.py | 4 +++- search/search.py | 18 +++++++++--------- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index c8ba6f5..b841b5c 100644 --- a/app.py +++ b/app.py @@ -716,5 +716,38 @@ def api_random_website(): return abort(403) +@app.route("/api/search", methods=["POST"]) +def api_search(): + + try: + token = request.json["token"] + except KeyError: + return abort(400) + + name = db.check_api_token(token) + + if name: + + try: + hits = searchEngine.search( + request.json["query"], + request.json["page"], request.json["per_page"], + request.json["sort_order"], + request.json["extensions"], + request.json["size_min"], request.json["size_max"], + request.json["match_all"], + request.json["fields"], + request.json["date_min"], request.json["date_max"] + ) + + hits = db.join_website_on_search_result(hits) + return json.dumps(hits) + + except InvalidQueryException as e: + return str(e) + else: + return abort(403) + + if __name__ == '__main__': app.run("0.0.0.0", port=12345, threaded=True) diff --git a/callbacks.py b/callbacks.py index 1965b82..f749ee1 100644 --- a/callbacks.py +++ b/callbacks.py @@ -59,7 +59,9 @@ class RedditCommentCallback(RedditCallback): print("Editing comment comment " + comment_id) stats = search.get_stats(self.task.website_id) - message = self.reddit_bot.get_comment(stats, self.task.website_id) + message = self.reddit_bot.get_comment(stats, self.task.website_id, + message="There you go! This website was crawled in `" + + str(int(task_result.end_time - task_result.start_time)) + "s`") print(message) self.reddit_bot.edit(self.reddit_bot.reddit.comment(comment_id), message) diff --git a/search/search.py b/search/search.py index fac8dd5..5c1efa8 100644 --- a/search/search.py +++ b/search/search.py @@ -311,7 +311,7 @@ class ElasticSearchEngine(SearchEngine): }, "size": 0 - }, index=self.index_name, request_timeout=120) + }, index=self.index_name, request_timeout=240) total_stats = self.es.search(body={ "query": { @@ -333,7 +333,7 @@ class ElasticSearchEngine(SearchEngine): }, "size": 0 - }, index=self.index_name, request_timeout=120) + }, index=self.index_name, request_timeout=241) size_and_date_histogram = self.es.search(body={ "query": { @@ -354,21 +354,21 @@ class ElasticSearchEngine(SearchEngine): "sizes": { "histogram": { "field": "size", - "interval": 50000000, # 50Mb - "min_doc_count": 100 + "interval": 100000000, # 100Mb + "min_doc_count": 500 } }, "dates": { "date_histogram": { "field": "mtime", "interval": "1y", - "min_doc_count": 100, + "min_doc_count": 500, "format": "yyyy" } } }, "size": 0 - }, index=self.index_name, request_timeout=120) + }, index=self.index_name, request_timeout=242) website_scatter = self.es.search(body={ "query": { @@ -384,7 +384,7 @@ class ElasticSearchEngine(SearchEngine): "websites": { "terms": { "field": "website_id", - "size": 500 # TODO: Figure out what size is appropriate + "size": 400 # TODO: Figure out what size is appropriate }, "aggs": { "size": { @@ -396,9 +396,9 @@ class ElasticSearchEngine(SearchEngine): } }, "size": 0 - }, index=self.index_name, request_timeout=120) + }, index=self.index_name, request_timeout=243) - es_stats = self.es.indices.stats(self.index_name, request_timeout=120) + es_stats = self.es.indices.stats(self.index_name, request_timeout=244) stats = dict() stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]