mirror of
				https://github.com/simon987/od-database.git
				synced 2025-10-25 19:56:51 +00:00 
			
		
		
		
	Stats are generated in background and stored to file instead of on-demand
This commit is contained in:
		
							parent
							
								
									bf82478fee
								
							
						
					
					
						commit
						fbbe952e4d
					
				
							
								
								
									
										2
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								app.py
									
									
									
									
									
								
							| @ -68,8 +68,10 @@ def stats_page(): | |||||||
| @cache.cached(240) | @cache.cached(240) | ||||||
| def stats_json(): | def stats_json(): | ||||||
|     stats = searchEngine.get_global_stats() |     stats = searchEngine.get_global_stats() | ||||||
|  |     if stats: | ||||||
|         db.join_website_on_stats(stats) |         db.join_website_on_stats(stats) | ||||||
|         return Response(json.dumps(stats), mimetype="application/json") |         return Response(json.dumps(stats), mimetype="application/json") | ||||||
|  |     return abort(500) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.route("/get_export") | @app.route("/get_export") | ||||||
|  | |||||||
| @ -3,6 +3,7 @@ import time | |||||||
| from elasticsearch import helpers | from elasticsearch import helpers | ||||||
| import os | import os | ||||||
| import ujson | import ujson | ||||||
|  | from apscheduler.schedulers.background import BackgroundScheduler | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class IndexingError(Exception): | class IndexingError(Exception): | ||||||
| @ -46,6 +47,10 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|         self.index_name = index_name |         self.index_name = index_name | ||||||
|         self.es = elasticsearch.Elasticsearch() |         self.es = elasticsearch.Elasticsearch() | ||||||
| 
 | 
 | ||||||
|  |         scheduler = BackgroundScheduler() | ||||||
|  |         scheduler.add_job(self._generate_global_stats, "interval", seconds=180) | ||||||
|  |         scheduler.start() | ||||||
|  | 
 | ||||||
|         if not self.es.indices.exists(self.index_name): |         if not self.es.indices.exists(self.index_name): | ||||||
|             self.init() |             self.init() | ||||||
| 
 | 
 | ||||||
| @ -271,6 +276,14 @@ class ElasticSearchEngine(SearchEngine): | |||||||
| 
 | 
 | ||||||
|     def get_global_stats(self): |     def get_global_stats(self): | ||||||
| 
 | 
 | ||||||
|  |         if os.path.exists("_stats.json"): | ||||||
|  |             with open("_stats.json", "r") as f: | ||||||
|  |                 return ujson.load(f) | ||||||
|  |         else: | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|  |     def _generate_global_stats(self): | ||||||
|  | 
 | ||||||
|         size_per_ext = self.es.search(body={ |         size_per_ext = self.es.search(body={ | ||||||
|             "query": { |             "query": { | ||||||
|                 "bool": { |                 "bool": { | ||||||
| @ -298,7 +311,7 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|             }, |             }, | ||||||
|             "size": 0 |             "size": 0 | ||||||
| 
 | 
 | ||||||
|         }, index=self.index_name, request_timeout=20) |         }, index=self.index_name, request_timeout=120) | ||||||
| 
 | 
 | ||||||
|         total_stats = self.es.search(body={ |         total_stats = self.es.search(body={ | ||||||
|             "query": { |             "query": { | ||||||
| @ -320,7 +333,7 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|             }, |             }, | ||||||
|             "size": 0 |             "size": 0 | ||||||
| 
 | 
 | ||||||
|         }, index=self.index_name, request_timeout=20) |         }, index=self.index_name, request_timeout=120) | ||||||
| 
 | 
 | ||||||
|         size_and_date_histogram = self.es.search(body={ |         size_and_date_histogram = self.es.search(body={ | ||||||
|             "query": { |             "query": { | ||||||
| @ -355,7 +368,7 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|             "size": 0 |             "size": 0 | ||||||
|         }, index=self.index_name, request_timeout=20) |         }, index=self.index_name, request_timeout=120) | ||||||
| 
 | 
 | ||||||
|         website_scatter = self.es.search(body={ |         website_scatter = self.es.search(body={ | ||||||
|             "query": { |             "query": { | ||||||
| @ -383,17 +396,15 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|             "size": 0 |             "size": 0 | ||||||
|         }, index=self.index_name, request_timeout=20) |         }, index=self.index_name, request_timeout=120) | ||||||
| 
 | 
 | ||||||
|         es_stats = self.es.indices.stats(self.index_name, request_timeout=20) |         es_stats = self.es.indices.stats(self.index_name, request_timeout=120) | ||||||
| 
 | 
 | ||||||
|         stats = dict() |         stats = dict() | ||||||
|         stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"] |         stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"] | ||||||
|         stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"] |         stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"] | ||||||
|         stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"] |         stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"] | ||||||
|         stats["es_search_time_avg"] = stats["es_search_time"] / ( |         stats["es_search_time_avg"] = stats["es_search_time"] / (stats["es_search_count"] if stats["es_search_count"] != 0 else 1) | ||||||
| 
 |  | ||||||
|             stats["es_search_count"] if stats["es_search_count"] != 0 else 1) |  | ||||||
| 
 | 
 | ||||||
|         stats["total_count"] = total_stats["hits"]["total"] |         stats["total_count"] = total_stats["hits"]["total"] | ||||||
|         stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"] |         stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"] | ||||||
| @ -411,7 +422,8 @@ class ElasticSearchEngine(SearchEngine): | |||||||
|                                     for b in website_scatter["aggregations"]["websites"]["buckets"]] |                                     for b in website_scatter["aggregations"]["websites"]["buckets"]] | ||||||
|         stats["base_url"] = "entire database" |         stats["base_url"] = "entire database" | ||||||
| 
 | 
 | ||||||
|         return stats |         with open("_stats.json", "w") as f: | ||||||
|  |             ujson.dump(stats, f) | ||||||
| 
 | 
 | ||||||
|     def stream_all_docs(self): |     def stream_all_docs(self): | ||||||
|         return helpers.scan(query={ |         return helpers.scan(query={ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user