Crawl server now holds at most max_workers + 1 tasks in pool to minimize waiting time and to avoid loss of too many tasks in case of crash/restart

This commit is contained in:
Simon
2018-06-12 22:28:36 -04:00
parent 24ef493245
commit 2fe81e4b06
5 changed files with 73 additions and 39 deletions

View File

@@ -213,3 +213,19 @@ class ElasticSearchEngine(SearchEngine):
src = hit["_source"]
yield base_url + src["path"] + ("/" if src["path"] != "" else "") + src["name"] + \
("." if src["ext"] != "" else "") + src["ext"]
def get_global_stats(self):
result = self.es.search(body={
"query": {
"match_all": {}
},
"aggs": {
"total_size": {
"extended_stats": {"field": "size"}
}
},
"size": 0
})
print(result)