2018-04-11 13:46:05 -04:00

89 lines
3.5 KiB
Python

import json
import elasticsearch
from threading import Thread
import subprocess
import requests
class Indexer:
def __init__(self, index: str):
self.index_name = index
self.es = elasticsearch.Elasticsearch()
try:
requests.head("http://localhost:9200")
print("elasticsearch is already running")
except requests.exceptions.ConnectionError:
import time
t = Thread(target=Indexer.run_elasticsearch)
t.daemon = True
t.start()
time.sleep(10)
self.init()
@staticmethod
def run_elasticsearch():
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
@staticmethod
def create_bulk_index_string(docs: list, directory: int):
"""
Creates a insert string for sending to elasticsearch
"""
result = ""
action_string = '{"index":{}}\n'
for doc in docs:
doc["directory"] = directory
result += action_string
result += json.dumps(doc) + "\n"
return result
def index(self, docs: list, directory: int):
print("Indexing " + str(len(docs)) + " docs")
index_string = Indexer.create_bulk_index_string(docs, directory)
self.es.bulk(body=index_string, index=self.index_name, doc_type="file", refresh="true")
def clear(self):
self.es.indices.delete(self.index_name)
self.es.indices.create(self.index_name)
def init(self):
self.es.indices.delete(index=self.index_name)
self.es.indices.create(index=self.index_name)
self.es.indices.close(index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase"]}}}}',
index=self.index_name)
self.es.indices.put_mapping(body='{"properties": {'
'"path": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
'"mime": {"type": "keyword"},'
'"directory": {"type": "keyword"},'
'"name": {"analyzer": "my_nGram", "type": "text"},'
'"album": {"analyzer": "my_nGram", "type": "text"},'
'"artist": {"analyzer": "my_nGram", "type": "text"},'
'"title": {"analyzer": "my_nGram", "type": "text"},'
'"genre": {"analyzer": "my_nGram", "type": "text"},'
'"album_artist": {"analyzer": "my_nGram", "type": "text"}'
'}}', doc_type="file", index=self.index_name)
self.es.indices.open(index=self.index_name)
print("Initialised elesticsearch")