Increased ES timeouts

This commit is contained in:
Simon 2018-06-26 17:01:17 -04:00
parent e384efd403
commit a0bd45c829

View File

@ -103,13 +103,13 @@ class ElasticSearchEngine(SearchEngine):
} }
} }
} }
}, index=self.index_name) }, index=self.index_name, request_timeout=40)
except elasticsearch.exceptions.ConflictError: except elasticsearch.exceptions.ConflictError:
print("Error: multiple delete tasks at the same time") print("Error: multiple delete tasks at the same time")
def import_json(self, in_lines, website_id: int): def import_json(self, in_lines, website_id: int):
import_every = 5000 import_every = 2500
docs = [] docs = []
@ -168,7 +168,8 @@ class ElasticSearchEngine(SearchEngine):
"path": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]} "path": {"pre_tags": ["<mark>"], "post_tags": ["</mark>"]}
} }
}, },
"size": per_page, "from": min(page * per_page, 10000 - per_page)}, index=self.index_name) "size": per_page, "from": min(page * per_page, 10000 - per_page)},
index=self.index_name, request_timeout=30)
return page return page
@ -203,7 +204,7 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
stats = dict() stats = dict()
stats["total_size"] = result["aggregations"]["total_size"]["value"] stats["total_size"] = result["aggregations"]["total_size"]["value"]
@ -225,7 +226,7 @@ class ElasticSearchEngine(SearchEngine):
"website_id": website_id} "website_id": website_id}
} }
}, },
index=self.index_name) index=self.index_name, request_timeout=30)
for hit in hits: for hit in hits:
src = hit["_source"] src = hit["_source"]
yield base_url + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \ yield base_url + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \
@ -259,7 +260,7 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
total_stats = self.es.search(body={ total_stats = self.es.search(body={
"query": { "query": {
@ -278,7 +279,7 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
size_and_date_histogram = self.es.search(body={ size_and_date_histogram = self.es.search(body={
"query": { "query": {
@ -318,7 +319,7 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
website_scatter = self.es.search(body={ website_scatter = self.es.search(body={
"query": { "query": {
@ -344,9 +345,9 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
es_stats = self.es.indices.stats(self.index_name) es_stats = self.es.indices.stats(self.index_name, request_timeout=30)
stats = dict() stats = dict()
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"] stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]
@ -402,13 +403,10 @@ class ElasticSearchEngine(SearchEngine):
} }
}, },
"size": 0 "size": 0
}, index=self.index_name) }, index=self.index_name, request_timeout=30)
non_empty_websites = [bucket["key"] for bucket in result["aggregations"]["websites"]["buckets"]] non_empty_websites = [bucket["key"] for bucket in result["aggregations"]["websites"]["buckets"]]
for website in websites: for website in websites:
if website not in non_empty_websites: if website not in non_empty_websites:
yield website yield website