Added web interface, crawler and more work on local storage

This commit is contained in:
simon
2018-02-21 20:07:59 -05:00
parent de0a835ecd
commit 165844e4ca
24 changed files with 1346 additions and 235 deletions

55
indexer.py Normal file
View File

@@ -0,0 +1,55 @@
import json
import elasticsearch
from threading import Thread
import subprocess
import requests
class Indexer:
def __init__(self, index: str):
self.index_name = index
self.es = elasticsearch.Elasticsearch()
try:
requests.head("http://localhost:9200")
print("elasticsearch is already running")
except requests.exceptions.ConnectionError:
import time
t = Thread(target=Indexer.run_elasticsearch)
t.daemon = True
t.start()
time.sleep(5)
@staticmethod
def run_elasticsearch():
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
@staticmethod
def create_bulk_index_string(docs: list, index_name: str):
"""
Creates a insert string for sending to elasticsearch
"""
result = ""
action_string = '{"index":{"_index":"' + index_name + '","_type":"file"}}\n'
for doc in docs:
result += action_string
result += json.dumps(doc) + "\n"
return result
def index(self, docs: list):
index_string = self.create_bulk_index_string(docs, self.index_name)
self.es.bulk(index_string)
def clear(self):
self.es.indices.delete(self.index_name)
self.es.indices.create(self.index_name)