mirror of
https://github.com/simon987/od-database.git
synced 2025-04-19 18:36:44 +00:00
Logging for search and better error handling
This commit is contained in:
parent
edf1849bac
commit
4996de6aa9
17
__init__.py
17
__init__.py
@ -0,0 +1,17 @@
|
|||||||
|
import logging
|
||||||
|
from logging import FileHandler, StreamHandler
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
logger = logging.getLogger("default")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
||||||
|
file_handler = FileHandler("oddb.log")
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(StreamHandler(sys.stdout))
|
||||||
|
|
||||||
|
# Disable flask logging
|
||||||
|
flaskLogger = logging.getLogger('werkzeug')
|
||||||
|
flaskLogger.setLevel(logging.ERROR)
|
@ -0,0 +1,13 @@
|
|||||||
|
import logging
|
||||||
|
from logging import FileHandler, StreamHandler
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
logger = logging.getLogger("default")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
|
||||||
|
file_handler = FileHandler("oddb.log")
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(StreamHandler(sys.stdout))
|
@ -5,6 +5,7 @@ import os
|
|||||||
import ujson
|
import ujson
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
|
from search import logger
|
||||||
from search.filter import SearchFilter
|
from search.filter import SearchFilter
|
||||||
|
|
||||||
|
|
||||||
@ -66,7 +67,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
scheduler.start()
|
scheduler.start()
|
||||||
|
|
||||||
def init(self):
|
def init(self):
|
||||||
print("Elasticsearch first time setup")
|
logger.info("Elasticsearch first time setup")
|
||||||
if self.es.indices.exists(self.index_name):
|
if self.es.indices.exists(self.index_name):
|
||||||
self.es.indices.delete(index=self.index_name)
|
self.es.indices.delete(index=self.index_name)
|
||||||
self.es.indices.create(index=self.index_name)
|
self.es.indices.create(index=self.index_name)
|
||||||
@ -113,7 +114,7 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
print("Deleting docs of " + str(website_id))
|
logger.debug("Deleting docs of " + str(website_id))
|
||||||
self.es.delete_by_query(body={
|
self.es.delete_by_query(body={
|
||||||
"query": {
|
"query": {
|
||||||
"constant_score": {
|
"constant_score": {
|
||||||
@ -125,11 +126,11 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
}, index=self.index_name, request_timeout=200)
|
}, index=self.index_name, request_timeout=200)
|
||||||
break
|
break
|
||||||
except elasticsearch.exceptions.ConflictError:
|
except elasticsearch.exceptions.ConflictError:
|
||||||
print("Error: multiple delete tasks at the same time, retrying")
|
logger.warning("Error: multiple delete tasks at the same time, retrying in 20s")
|
||||||
time.sleep(10)
|
time.sleep(20)
|
||||||
except Exception:
|
except Exception:
|
||||||
print("Timeout during delete! Retrying")
|
logger.warning("Timeout during delete! Retrying in 20s")
|
||||||
time.sleep(10)
|
time.sleep(20)
|
||||||
|
|
||||||
def import_json(self, in_lines, website_id: int):
|
def import_json(self, in_lines, website_id: int):
|
||||||
|
|
||||||
@ -139,12 +140,15 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
docs = []
|
docs = []
|
||||||
|
|
||||||
for line in in_lines:
|
for line in in_lines:
|
||||||
|
try:
|
||||||
doc = ujson.loads(line)
|
doc = ujson.loads(line)
|
||||||
name, ext = os.path.splitext(doc["name"])
|
name, ext = os.path.splitext(doc["name"])
|
||||||
doc["ext"] = ext[1:].lower() if ext and len(ext) > 1 else ""
|
doc["ext"] = ext[1:].lower() if ext and len(ext) > 1 else ""
|
||||||
doc["name"] = name
|
doc["name"] = name
|
||||||
doc["website_id"] = website_id
|
doc["website_id"] = website_id
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error in import_json: " + str(e) + " for line : + \n" + line)
|
||||||
|
|
||||||
if len(docs) >= import_every:
|
if len(docs) >= import_every:
|
||||||
self._index(docs)
|
self._index(docs)
|
||||||
@ -155,12 +159,12 @@ class ElasticSearchEngine(SearchEngine):
|
|||||||
self._index(docs)
|
self._index(docs)
|
||||||
|
|
||||||
def _index(self, docs):
|
def _index(self, docs):
|
||||||
print("Indexing " + str(len(docs)) + " docs")
|
logger.debug("Indexing " + str(len(docs)) + " docs")
|
||||||
bulk_string = ElasticSearchEngine.create_bulk_index_string(docs)
|
bulk_string = ElasticSearchEngine.create_bulk_index_string(docs)
|
||||||
result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30)
|
result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30)
|
||||||
|
|
||||||
if result["errors"]:
|
if result["errors"]:
|
||||||
print(result)
|
logger.error("Error in ES bulk index: \n" + result["errors"])
|
||||||
raise IndexingError
|
raise IndexingError
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
Loading…
x
Reference in New Issue
Block a user