mirror of
				https://github.com/simon987/od-database.git
				synced 2025-10-25 19:56:51 +00:00 
			
		
		
		
	Stats are generated in background and stored to file instead of on-demand
This commit is contained in:
		
							parent
							
								
									bf82478fee
								
							
						
					
					
						commit
						fbbe952e4d
					
				
							
								
								
									
										6
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								app.py
									
									
									
									
									
								
							| @ -68,8 +68,10 @@ def stats_page(): | ||||
| @cache.cached(240) | ||||
| def stats_json(): | ||||
|     stats = searchEngine.get_global_stats() | ||||
|     db.join_website_on_stats(stats) | ||||
|     return Response(json.dumps(stats), mimetype="application/json") | ||||
|     if stats: | ||||
|         db.join_website_on_stats(stats) | ||||
|         return Response(json.dumps(stats), mimetype="application/json") | ||||
|     return abort(500) | ||||
| 
 | ||||
| 
 | ||||
| @app.route("/get_export") | ||||
|  | ||||
| @ -3,6 +3,7 @@ import time | ||||
| from elasticsearch import helpers | ||||
| import os | ||||
| import ujson | ||||
| from apscheduler.schedulers.background import BackgroundScheduler | ||||
| 
 | ||||
| 
 | ||||
| class IndexingError(Exception): | ||||
| @ -46,6 +47,10 @@ class ElasticSearchEngine(SearchEngine): | ||||
|         self.index_name = index_name | ||||
|         self.es = elasticsearch.Elasticsearch() | ||||
| 
 | ||||
|         scheduler = BackgroundScheduler() | ||||
|         scheduler.add_job(self._generate_global_stats, "interval", seconds=180) | ||||
|         scheduler.start() | ||||
| 
 | ||||
|         if not self.es.indices.exists(self.index_name): | ||||
|             self.init() | ||||
| 
 | ||||
| @ -271,6 +276,14 @@ class ElasticSearchEngine(SearchEngine): | ||||
| 
 | ||||
|     def get_global_stats(self): | ||||
| 
 | ||||
|         if os.path.exists("_stats.json"): | ||||
|             with open("_stats.json", "r") as f: | ||||
|                 return ujson.load(f) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def _generate_global_stats(self): | ||||
| 
 | ||||
|         size_per_ext = self.es.search(body={ | ||||
|             "query": { | ||||
|                 "bool": { | ||||
| @ -298,7 +311,7 @@ class ElasticSearchEngine(SearchEngine): | ||||
|             }, | ||||
|             "size": 0 | ||||
| 
 | ||||
|         }, index=self.index_name, request_timeout=20) | ||||
|         }, index=self.index_name, request_timeout=120) | ||||
| 
 | ||||
|         total_stats = self.es.search(body={ | ||||
|             "query": { | ||||
| @ -320,7 +333,7 @@ class ElasticSearchEngine(SearchEngine): | ||||
|             }, | ||||
|             "size": 0 | ||||
| 
 | ||||
|         }, index=self.index_name, request_timeout=20) | ||||
|         }, index=self.index_name, request_timeout=120) | ||||
| 
 | ||||
|         size_and_date_histogram = self.es.search(body={ | ||||
|             "query": { | ||||
| @ -355,7 +368,7 @@ class ElasticSearchEngine(SearchEngine): | ||||
|                 } | ||||
|             }, | ||||
|             "size": 0 | ||||
|         }, index=self.index_name, request_timeout=20) | ||||
|         }, index=self.index_name, request_timeout=120) | ||||
| 
 | ||||
|         website_scatter = self.es.search(body={ | ||||
|             "query": { | ||||
| @ -383,17 +396,15 @@ class ElasticSearchEngine(SearchEngine): | ||||
|                 } | ||||
|             }, | ||||
|             "size": 0 | ||||
|         }, index=self.index_name, request_timeout=20) | ||||
|         }, index=self.index_name, request_timeout=120) | ||||
| 
 | ||||
|         es_stats = self.es.indices.stats(self.index_name, request_timeout=20) | ||||
|         es_stats = self.es.indices.stats(self.index_name, request_timeout=120) | ||||
| 
 | ||||
|         stats = dict() | ||||
|         stats["es_index_size"] = es_stats["indices"][self.index_name]["total"]["store"]["size_in_bytes"] | ||||
|         stats["es_search_count"] = es_stats["indices"][self.index_name]["total"]["search"]["query_total"] | ||||
|         stats["es_search_time"] = es_stats["indices"][self.index_name]["total"]["search"]["query_time_in_millis"] | ||||
|         stats["es_search_time_avg"] = stats["es_search_time"] / ( | ||||
| 
 | ||||
|             stats["es_search_count"] if stats["es_search_count"] != 0 else 1) | ||||
|         stats["es_search_time_avg"] = stats["es_search_time"] / (stats["es_search_count"] if stats["es_search_count"] != 0 else 1) | ||||
| 
 | ||||
|         stats["total_count"] = total_stats["hits"]["total"] | ||||
|         stats["total_size"] = total_stats["aggregations"]["file_stats"]["sum"] | ||||
| @ -411,7 +422,8 @@ class ElasticSearchEngine(SearchEngine): | ||||
|                                     for b in website_scatter["aggregations"]["websites"]["buckets"]] | ||||
|         stats["base_url"] = "entire database" | ||||
| 
 | ||||
|         return stats | ||||
|         with open("_stats.json", "w") as f: | ||||
|             ujson.dump(stats, f) | ||||
| 
 | ||||
|     def stream_all_docs(self): | ||||
|         return helpers.scan(query={ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user