mirror of
https://github.com/simon987/od-database.git
synced 2025-04-20 19:06:42 +00:00
Stats are generated in background and stored to file instead of on-demand
This commit is contained in:
parent
bf82478fee
commit
fbbe952e4d
2
app.py
2
app.py
@ -68,8 +68,10 @@ def stats_page():
|
|||||||
@cache.cached(240)
|
@cache.cached(240)
|
||||||
def stats_json():
|
def stats_json():
|
||||||
stats = searchEngine.get_global_stats()
|
stats = searchEngine.get_global_stats()
|
||||||
|
if stats:
|
||||||
db.join_website_on_stats(stats)
|
db.join_website_on_stats(stats)
|
||||||
return Response(json.dumps(stats), mimetype="application/json")
|
return Response(json.dumps(stats), mimetype="application/json")
|
||||||
|
return abort(500)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/get_export")
|
@app.route("/get_export")
|
||||||
|
@ -3,6 +3,7 @@ import time
|
|||||||
from elasticsearch import helpers
|
from elasticsearch import helpers
|
||||||
import os
|
import os
|
||||||
import ujson
|
import ujson
|
||||||
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
|
|
||||||
class IndexingError(Exception):
|
class IndexingError(Exception):
|
||||||
@ -46,6 +47,10 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
self.index_name = index_name
|
self.index_name = index_name
|
||||||
self.es = elasticsearch.Elasticsearch()
|
self.es = elasticsearch.Elasticsearch()
|
||||||
|
|
||||||
|
scheduler = BackgroundScheduler()
|
||||||
|
scheduler.add_job(self._generate_global_stats, "interval", seconds=180)
|
||||||
|
scheduler.start()
|
||||||
|
|
||||||
if not self.es.indices.exists(self.index_name):
|
if not self.es.indices.exists(self.index_name):
|
||||||
self.init()
|
self.init()
|
||||||
|
|
||||||
@ -271,6 +276,14 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
|
|
||||||
def get_global_stats(self):
|
def get_global_stats(self):
|
||||||
|
|
||||||
|
if os.path.exists("_stats.json"):
|
||||||
|
with open("_stats.json", "r") as f:
|
||||||
|
return ujson.load(f)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _generate_global_stats(self):
|
||||||
|
|
||||||
size_per_ext = self.es.search(body={
|
size_per_ext = self.es.search(body={
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
@ -298,7 +311,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
},
|
},
|
||||||
"size": 0
|
"size": 0
|
||||||
|
|
||||||
}, index=self.index_name, request_timeout=20)
|
}, index=self.index_name, request_timeout=120)
|
||||||
|
|
||||||
total_stats = self.es.search(body={
|
total_stats = self.es.search(body={
|
||||||
"query": {
|
"query": {
|
||||||
@ -320,7 +333,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
},
|
},
|
||||||
"size": 0
|
"size": 0
|
||||||
|
|
||||||
}, index=self.index_name, request_timeout=20)
|
}, index=self.index_name, request_timeout=120)
|
||||||
|
|
||||||
size_and_date_histogram = self.es.search(body={
|
size_and_date_histogram = self.es.search(body={
|
||||||
"query": {
|
"query": {
|
||||||
@ -355,7 +368,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"size": 0
|
"size": 0
|
||||||
}, index=self.index_name, request_timeout=20)
|
}, index=self.index_name, request_timeout=120)
|
||||||
|
|
||||||
website_scatter = self.es.search(body={
|
website_scatter = self.es.search(body={
|
||||||
"query": {
|
"query": {
|
||||||
@ -383,17 +396,15 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"size": 0
|
"size": 0
|
||||||
}, index=self.index_name, request_timeout=20)
|
}, index=self.index_name, request_timeout=120)
|
||||||
|
|
||||||
es_stats = self.es.indices.stats(self.index_name, request_timeout=20)
|
es_stats = self.es.indices.stats(self.index_name, request_timeout=120)
|
||||||
|
|
||||||
stats = dict()
|
stats = dict()
|
||||||
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]
|
stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"]
|
||||||
stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"]
|
stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"]
|
||||||
stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"]
|
stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"]
|
||||||
stats["es_search_time_avg"] = stats["es_search_time"] / (
|
stats["es_search_time_avg"] = stats["es_search_time"] / (stats["es_search_count"] if stats["es_search_count"] != 0 else 1)
|
||||||
|
|
||||||
stats["es_search_count"] if stats["es_search_count"] != 0 else 1)
|
|
||||||
|
|
||||||
stats["total_count"] = total_stats["hits"]["total"]
|
stats["total_count"] = total_stats["hits"]["total"]
|
||||||
stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"]
|
stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"]
|
||||||
@ -411,7 +422,8 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
for b in website_scatter["aggregations"]["websites"]["buckets"]]
|
for b in website_scatter["aggregations"]["websites"]["buckets"]]
|
||||||
stats["base_url"] = "entire database"
|
stats["base_url"] = "entire database"
|
||||||
|
|
||||||
return stats
|
with open("_stats.json", "w") as f:
|
||||||
|
ujson.dump(stats, f)
|
||||||
|
|
||||||
def stream_all_docs(self):
|
def stream_all_docs(self):
|
||||||
return helpers.scan(query={
|
return helpers.scan(query={
|
||||||
|
Loading…
x
Reference in New Issue
Block a user