Increased stats timeout value

This commit is contained in:
Simon 2018-08-09 14:26:22 -04:00
parent f729b462f0
commit 8ffd9179d2
3 changed files with 45 additions and 10 deletions

33
app.py
View File

@ -716,5 +716,38 @@ def api_random_website():
return abort(403) return abort(403)
@app.route("/api/search", methods=["POST"])
def api_search():
try:
token = request.json["token"]
except KeyError:
return abort(400)
name = db.check_api_token(token)
if name:
try:
hits = searchEngine.search(
request.json["query"],
request.json["page"], request.json["per_page"],
request.json["sort_order"],
request.json["extensions"],
request.json["size_min"], request.json["size_max"],
request.json["match_all"],
request.json["fields"],
request.json["date_min"], request.json["date_max"]
)
hits = db.join_website_on_search_result(hits)
return json.dumps(hits)
except InvalidQueryException as e:
return str(e)
else:
return abort(403)
if __name__ == '__main__': if __name__ == '__main__':
app.run("0.0.0.0", port=12345, threaded=True) app.run("0.0.0.0", port=12345, threaded=True)

View File

@ -59,7 +59,9 @@ class RedditCommentCallback(RedditCallback):
print("Editing comment comment " + comment_id) print("Editing comment comment " + comment_id)
stats = search.get_stats(self.task.website_id) stats = search.get_stats(self.task.website_id)
message = self.reddit_bot.get_comment(stats, self.task.website_id) message = self.reddit_bot.get_comment(stats, self.task.website_id,
message="There you go! This website was crawled in `" +
str(int(task_result.end_time - task_result.start_time)) + "s`")
print(message) print(message)
self.reddit_bot.edit(self.reddit_bot.reddit.comment(comment_id), message) self.reddit_bot.edit(self.reddit_bot.reddit.comment(comment_id), message)

View File

@ -311,7 +311,7 @@ class ElasticSearchEngine(SearchEngine):
}, },
"size": 0 "size": 0
}, index=self.index_name, request_timeout=120) }, index=self.index_name, request_timeout=240)
total_stats = self.es.search(body={ total_stats = self.es.search(body={
"query": { "query": {
@ -333,7 +333,7 @@ class ElasticSearchEngine(SearchEngine):
}, },
"size": 0 "size": 0
}, index=self.index_name, request_timeout=120) }, index=self.index_name, request_timeout=241)
size_and_date_histogram = self.es.search(body={ size_and_date_histogram = self.es.search(body={
"query": { "query": {
@ -354,21 +354,21 @@ class ElasticSearchEngine(SearchEngine):
"sizes": { "sizes": {
"histogram": { "histogram": {
"field": "size", "field": "size",
"interval": 50000000, # 50Mb "interval": 100000000, # 100Mb
"min_doc_count": 100 "min_doc_count": 500
} }
}, },
"dates": { "dates": {
"date_histogram": { "date_histogram": {
"field": "mtime", "field": "mtime",
"interval": "1y", "interval": "1y",
"min_doc_count": 100, "min_doc_count": 500,
"format": "yyyy" "format": "yyyy"
} }
} }
}, },
"size": 0 "size": 0
}, index=self.index_name, request_timeout=120) }, index=self.index_name, request_timeout=242)
website_scatter = self.es.search(body={ website_scatter = self.es.search(body={
"query": { "query": {
@ -384,7 +384,7 @@ class ElasticSearchEngine(SearchEngine):
"websites": { "websites": {
"terms": { "terms": {
"field": "website_id", "field": "website_id",
"size": 500 # TODO: Figure out what size is appropriate "size": 400 # TODO: Figure out what size is appropriate
}, },
"aggs": { "aggs": {
"size": { "size": {
@ -396,9 +396,9 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name, request_timeout=120) }, index=self.index_name, request_timeout=243)
es_stats = self.es.indices.stats(self.index_name, request_timeout=120) es_stats = self.es.indices.stats(self.index_name, request_timeout=244)
stats = dict() stats = dict()
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"] stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]