Logging for search and better error handling

This commit is contained in:
Simon 2018-11-17 11:19:09 -05:00
parent edf1849bac
commit 4996de6aa9
3 changed files with 48 additions and 14 deletions

View File

@ -0,0 +1,17 @@
import logging
from logging import FileHandler, StreamHandler
import sys
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
file_handler = FileHandler("oddb.log")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(StreamHandler(sys.stdout))
# Disable flask logging
flaskLogger = logging.getLogger('werkzeug')
flaskLogger.setLevel(logging.ERROR)

View File

@ -0,0 +1,13 @@
import logging
from logging import FileHandler, StreamHandler
import sys
logger = logging.getLogger("default")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-5s %(message)s')
file_handler = FileHandler("oddb.log")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(StreamHandler(sys.stdout))

View File

@ -5,6 +5,7 @@ import os
import ujson import ujson
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from search import logger
from search.filter import SearchFilter from search.filter import SearchFilter
@ -66,7 +67,7 @@ class ElasticSearchEngine(SearchEngine):
scheduler.start() scheduler.start()
def init(self): def init(self):
print("Elasticsearch first time setup") logger.info("Elasticsearch first time setup")
if self.es.indices.exists(self.index_name): if self.es.indices.exists(self.index_name):
self.es.indices.delete(index=self.index_name) self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name) self.es.indices.create(index=self.index_name)
@ -113,7 +114,7 @@ class ElasticSearchEngine(SearchEngine):
while True: while True:
try: try:
print("Deleting docs of " + str(website_id)) logger.debug("Deleting docs of " + str(website_id))
self.es.delete_by_query(body={ self.es.delete_by_query(body={
"query": { "query": {
"constant_score": { "constant_score": {
@ -125,11 +126,11 @@ class ElasticSearchEngine(SearchEngine):
}, index=self.index_name, request_timeout=200) }, index=self.index_name, request_timeout=200)
break break
except elasticsearch.exceptions.ConflictError: except elasticsearch.exceptions.ConflictError:
print("Error: multiple delete tasks at the same time, retrying") logger.warning("Error: multiple delete tasks at the same time, retrying in 20s")
time.sleep(10) time.sleep(20)
except Exception: except Exception:
print("Timeout during delete! Retrying") logger.warning("Timeout during delete! Retrying in 20s")
time.sleep(10) time.sleep(20)
def import_json(self, in_lines, website_id: int): def import_json(self, in_lines, website_id: int):
@ -139,12 +140,15 @@ class ElasticSearchEngine(SearchEngine):
docs = [] docs = []
for line in in_lines: for line in in_lines:
try:
doc = ujson.loads(line) doc = ujson.loads(line)
name, ext = os.path.splitext(doc["name"]) name, ext = os.path.splitext(doc["name"])
doc["ext"] = ext[1:].lower() if ext and len(ext) > 1 else "" doc["ext"] = ext[1:].lower() if ext and len(ext) > 1 else ""
doc["name"] = name doc["name"] = name
doc["website_id"] = website_id doc["website_id"] = website_id
docs.append(doc) docs.append(doc)
except Exception as e:
logger.error("Error in import_json: " + str(e) + " for line : + \n" + line)
if len(docs) >= import_every: if len(docs) >= import_every:
self._index(docs) self._index(docs)
@ -155,12 +159,12 @@ class ElasticSearchEngine(SearchEngine):
self._index(docs) self._index(docs)
def _index(self, docs): def _index(self, docs):
print("Indexing " + str(len(docs)) + " docs") logger.debug("Indexing " + str(len(docs)) + " docs")
bulk_string = ElasticSearchEngine.create_bulk_index_string(docs) bulk_string = ElasticSearchEngine.create_bulk_index_string(docs)
result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30) result = self.es.bulk(body=bulk_string, index=self.index_name, doc_type="file", request_timeout=30)
if result["errors"]: if result["errors"]:
print(result) logger.error("Error in ES bulk index: \n" + result["errors"])
raise IndexingError raise IndexingError
@staticmethod @staticmethod