57 lines
1.8 KiB
Python

from elasticsearch import Elasticsearch
from indexer import Indexer
import json
from crawler import Crawler
from indexer import Indexer
from parsing import GenericFileParser, Sha256CheckSumCalculator, ExtensionMimeGuesser
es = Elasticsearch()
1
# reset
es.indices.delete(index="test")
es.indices.create(index="test")
es.indices.close(index="test")
# # config
es.indices.put_settings(body='{"analysis": {"analyzer": {"path_analyser": {'
'"tokenizer": "path_tokenizer"}}, "tokenizer": {"path_tokenizer": {'
'"type": "path_hierarchy"}}}}', index="test")
es.indices.put_mapping(body='{"properties": {'
'"name": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},'
'"suggest-path": {"type": "completion", "analyzer": "keyword"},'
'"mime": {"type": "keyword"}'
'}}', index="test",doc_type="file" )
es.indices.open(index="test")
# add docs
# crawler = Crawler([GenericFileParser([Sha256CheckSumCalculator()], ExtensionMimeGuesser())])
# crawler.crawl("spec/test_folder")
#
# indexer = Indexer("test")
#
# indexer.index(crawler.documents)
# search
# print(es.search("test", "file", '{"query": {"term": {"name": "spec/test_folder/sub2/"}}}'))
# print(es.search("test", "file", '{"query": {"match_all": {}}, "aggs": {"test": {"terms": {"field": "mime"}}}}'))
# suggest = es.search("test", "file", '{"suggest": {"path-suggest": {"prefix": "spec/test_folder/sub", "completion": {"field": "suggest-path"}}}}')
#
# print(suggest["suggest"]["path-suggest"])
#
# for hit in suggest["suggest"]["path-suggest"][0]["options"]:
# print(hit["text"])
# indexer = Indexer("test")
# import time
# time.sleep(10)
c = Crawler([])
c.countFiles("/")