bulk indexing

This commit is contained in:
simon
2018-03-13 12:22:00 -04:00
parent e79a68ebe6
commit 9d75fc4d59
4 changed files with 54 additions and 16 deletions

View File

@@ -22,34 +22,60 @@ class Indexer:
t.daemon = True
t.start()
time.sleep(5)
time.sleep(10)
self.init()
@staticmethod
def run_elasticsearch():
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
@staticmethod
def create_bulk_index_string(docs: list, index_name: str):
def create_bulk_index_string(docs: list):
"""
Creates a insert string for sending to elasticsearch
"""
print("Creating bulk index string...")
result = ""
action_string = '{"index":{"_index":"' + index_name + '","_type":"file"}}\n'
action_string = '{"index":{}}\n'
for doc in docs:
result += action_string
result += json.dumps(doc) + "\n"
print(result)
return result
def index(self, docs: list):
index_string = self.create_bulk_index_string(docs, self.index_name)
self.es.bulk(index_string)
print("Indexing " + str(len(docs)) + " docs")
index_string = Indexer.create_bulk_index_string(docs)
print("bulk-start")
self.es.bulk(body=index_string, index=self.index_name, doc_type="file")
print("bulk-done")
def clear(self):
self.es.indices.delete(self.index_name)
self.es.indices.create(self.index_name)
def init(self):
self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name)
self.es.indices.close(index=self.index_name)
self.es.indices.put_settings(body='{"analysis": {"analyzer": {"path_analyser": {'
'"tokenizer": "path_tokenizer"}}, "tokenizer": {"path_tokenizer": {'
'"type": "path_hierarchy"}}}}', index=self.index_name)
self.es.indices.put_mapping(body='{"properties": {'
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
'"mime": {"type": "keyword"}'
'}}', doc_type="file", index=self.index_name)
self.es.indices.open(index=self.index_name)
print("Initialised elesticsearch")