mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-12-13 15:19:05 +00:00
bulk indexing
This commit is contained in:
38
indexer.py
38
indexer.py
@@ -22,34 +22,60 @@ class Indexer:
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
time.sleep(5)
|
||||
time.sleep(10)
|
||||
self.init()
|
||||
|
||||
@staticmethod
|
||||
def run_elasticsearch():
|
||||
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
|
||||
|
||||
@staticmethod
|
||||
def create_bulk_index_string(docs: list, index_name: str):
|
||||
def create_bulk_index_string(docs: list):
|
||||
"""
|
||||
Creates a insert string for sending to elasticsearch
|
||||
"""
|
||||
|
||||
print("Creating bulk index string...")
|
||||
|
||||
result = ""
|
||||
|
||||
action_string = '{"index":{"_index":"' + index_name + '","_type":"file"}}\n'
|
||||
action_string = '{"index":{}}\n'
|
||||
|
||||
for doc in docs:
|
||||
result += action_string
|
||||
result += json.dumps(doc) + "\n"
|
||||
|
||||
print(result)
|
||||
|
||||
return result
|
||||
|
||||
def index(self, docs: list):
|
||||
|
||||
index_string = self.create_bulk_index_string(docs, self.index_name)
|
||||
self.es.bulk(index_string)
|
||||
print("Indexing " + str(len(docs)) + " docs")
|
||||
index_string = Indexer.create_bulk_index_string(docs)
|
||||
print("bulk-start")
|
||||
self.es.bulk(body=index_string, index=self.index_name, doc_type="file")
|
||||
print("bulk-done")
|
||||
|
||||
def clear(self):
|
||||
|
||||
self.es.indices.delete(self.index_name)
|
||||
self.es.indices.create(self.index_name)
|
||||
|
||||
def init(self):
|
||||
self.es.indices.delete(index=self.index_name)
|
||||
self.es.indices.create(index=self.index_name)
|
||||
self.es.indices.close(index=self.index_name)
|
||||
|
||||
self.es.indices.put_settings(body='{"analysis": {"analyzer": {"path_analyser": {'
|
||||
'"tokenizer": "path_tokenizer"}}, "tokenizer": {"path_tokenizer": {'
|
||||
'"type": "path_hierarchy"}}}}', index=self.index_name)
|
||||
|
||||
self.es.indices.put_mapping(body='{"properties": {'
|
||||
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
|
||||
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
|
||||
'"mime": {"type": "keyword"}'
|
||||
'}}', doc_type="file", index=self.index_name)
|
||||
|
||||
self.es.indices.open(index=self.index_name)
|
||||
|
||||
print("Initialised elesticsearch")
|
||||
|
||||
Reference in New Issue
Block a user