From 4b151966b304fe7cd08059f2b0be1de3ddeb7b18 Mon Sep 17 00:00:00 2001 From: simon Date: Sun, 22 Apr 2018 12:34:20 -0400 Subject: [PATCH] Made it work on Windows --- crawler.py | 2 +- indexer.py | 18 +++++++++++++----- parsing.py | 1 - thumbnail.py | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/crawler.py b/crawler.py index c947db1..d6fe673 100644 --- a/crawler.py +++ b/crawler.py @@ -29,7 +29,7 @@ class RunningTask: class Crawler: - def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ContentMimeGuesser(), indexer=None, dir_id=0, + def __init__(self, enabled_parsers: list, mime_guesser: MimeGuesser=ExtensionMimeGuesser(), indexer=None, dir_id=0, root_dir="/"): self.documents = [] self.enabled_parsers = enabled_parsers diff --git a/indexer.py b/indexer.py index 98132ef..3d056a6 100644 --- a/indexer.py +++ b/indexer.py @@ -1,9 +1,11 @@ import json import elasticsearch +from elasticsearch.exceptions import TransportError from threading import Thread import subprocess import requests import config +import platform class Indexer: @@ -24,15 +26,20 @@ class Indexer: time.sleep(15) - try: - requests.head("http://localhost:9200") - except requests.exceptions.ConnectionError: + if self.es.indices.exists(self.index_name): + print("Index is already setup") + else: print("First time setup...") self.init() @staticmethod def run_elasticsearch(): - subprocess.Popen(["elasticsearch/bin/elasticsearch"]) + + if platform.system() == "Windows": + subprocess.Popen(["elasticsearch\\bin\\elasticsearch.bat"]) + else: + print(platform.system()) + subprocess.Popen(["elasticsearch/bin/elasticsearch"]) @staticmethod def create_bulk_index_string(docs: list, directory: int): @@ -62,7 +69,8 @@ class Indexer: self.es.indices.create(self.index_name) def init(self): - self.es.indices.delete(index=self.index_name) + if self.es.indices.exists(self.index_name): + self.es.indices.delete(index=self.index_name) self.es.indices.create(index=self.index_name) self.es.indices.close(index=self.index_name) diff --git a/parsing.py b/parsing.py index 562b5b6..d60da86 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,4 @@ import hashlib -import magic import os import mimetypes import subprocess diff --git a/thumbnail.py b/thumbnail.py index b639337..26ecf57 100644 --- a/thumbnail.py +++ b/thumbnail.py @@ -2,7 +2,7 @@ from PIL import Image import os from multiprocessing import Value, Process import ffmpeg -import cairosvg +#import cairosvg class ThumbnailGenerator: