mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-15 16:26:45 +00:00
Small improvements in indexing
This commit is contained in:
parent
213cc61da9
commit
fe52ecceff
21
README.md
21
README.md
@ -2,7 +2,20 @@
|
||||
|
||||
Work in progress: probably won't work without some tweaking
|
||||
|
||||
## Running on linux
|
||||
## Setup on Windows
|
||||
```bash
|
||||
git clone https://github.com/simon987/Projet-Web-2018
|
||||
cd Projet-Web-2018
|
||||
```
|
||||
[Download latest elasticsearch version](https://www.elastic.co/downloads/elasticsearch) and extract to `Projet-Web-2018\elasticsearch`
|
||||
|
||||
```bash
|
||||
sudo pip3 install -r requirements.txt
|
||||
|
||||
python3 run.py
|
||||
```
|
||||
|
||||
## Setup on Mac/linux
|
||||
```bash
|
||||
git clone https://github.com/simon987/Projet-Web-2018
|
||||
cd Projet-Web-2018
|
||||
@ -16,7 +29,7 @@ sudo pip3 install -r requirements.txt
|
||||
python3 run.py
|
||||
```
|
||||
|
||||
## Running tests
|
||||
```
|
||||
python3 -m unittest discover
|
||||
## Running unit tests
|
||||
```bash
|
||||
python3 -m unittest
|
||||
```
|
||||
|
@ -25,4 +25,10 @@ bcrypt_rounds = 14
|
||||
# sqlite3 database path
|
||||
db_path = "./local_storage.db"
|
||||
|
||||
try:
|
||||
import cairosvg
|
||||
cairosvg = True
|
||||
except:
|
||||
cairosvg = False
|
||||
|
||||
VERSION = "1.0a"
|
||||
|
@ -37,7 +37,6 @@ class Indexer:
|
||||
if platform.system() == "Windows":
|
||||
subprocess.Popen(["elasticsearch\\bin\\elasticsearch.bat"])
|
||||
else:
|
||||
print(platform.system())
|
||||
subprocess.Popen(["elasticsearch/bin/elasticsearch"])
|
||||
|
||||
@staticmethod
|
||||
@ -86,6 +85,9 @@ class Indexer:
|
||||
"analysis": {"analyzer": {"my_nGram": {"tokenizer": "my_nGram_tokenizer", "filter": ["lowercase",
|
||||
"asciifolding"]}}}},
|
||||
index=self.index_name)
|
||||
self.es.indices.put_settings(body={
|
||||
"analysis": {"analyzer": {"content_analyser": {"tokenizer": "standard", "filter": ["lowercase"]}}}},
|
||||
index=self.index_name)
|
||||
|
||||
self.es.indices.put_mapping(body={"properties": {
|
||||
"path": {"type": "text", "analyzer": "path_analyser", "copy_to": "suggest-path"},
|
||||
@ -98,6 +100,7 @@ class Indexer:
|
||||
"width": {"type": "integer"},
|
||||
"height": {"type": "integer"},
|
||||
"mtime": {"type": "integer"},
|
||||
"size": {"type": "long"},
|
||||
"directory": {"type": "short"},
|
||||
"name": {"analyzer": "my_nGram", "type": "text"},
|
||||
"album": {"analyzer": "my_nGram", "type": "text"},
|
||||
@ -105,6 +108,7 @@ class Indexer:
|
||||
"title": {"analyzer": "my_nGram", "type": "text"},
|
||||
"genre": {"analyzer": "my_nGram", "type": "text"},
|
||||
"album_artist": {"analyzer": "my_nGram", "type": "text"},
|
||||
"content": {"analyzer": "content_analyser", "type": "text"},
|
||||
}}, doc_type="file", index=self.index_name)
|
||||
|
||||
self.es.indices.open(index=self.index_name)
|
||||
|
@ -4,7 +4,6 @@ import mimetypes
|
||||
import subprocess
|
||||
import json
|
||||
import chardet
|
||||
import html
|
||||
import warnings
|
||||
import docx2txt
|
||||
import xlrd
|
||||
@ -290,7 +289,7 @@ class TextFileParser(GenericFileParser):
|
||||
info["encoding"] = encoding
|
||||
try:
|
||||
content = raw_content.decode(encoding, "ignore")
|
||||
info["content"] = html.escape(content)
|
||||
info["content"] = content
|
||||
except Exception:
|
||||
print("Unknown encoding: " + encoding)
|
||||
|
||||
@ -497,7 +496,6 @@ class SpreadSheetParser(GenericFileParser):
|
||||
num_cells = worksheet.ncols
|
||||
|
||||
for curr_row in range(num_rows):
|
||||
row = worksheet.row(curr_row)
|
||||
new_output = []
|
||||
for index_col in xrange(num_cells):
|
||||
value = worksheet.cell_value(curr_row, index_col)
|
||||
|
@ -14,5 +14,5 @@ class TextFileParserTest(TestCase):
|
||||
info = parser.parse(dir_name + "/test_files/text.csv")
|
||||
|
||||
self.assertTrue(info["content"].startswith("rosbagTimestamp,header,seq,stamp,secs,nsecs,"))
|
||||
self.assertEqual(len(info["content"]), 1309) # Size is larger because of html escaping
|
||||
self.assertEqual(len(info["content"]), 1234)
|
||||
self.assertEqual(info["encoding"], "ascii")
|
||||
|
12
thumbnail.py
12
thumbnail.py
@ -2,7 +2,10 @@ from PIL import Image
|
||||
import os
|
||||
from multiprocessing import Value, Process
|
||||
import ffmpeg
|
||||
#import cairosvg
|
||||
import config
|
||||
|
||||
if config.cairosvg:
|
||||
import cairosvg
|
||||
|
||||
|
||||
class ThumbnailGenerator:
|
||||
@ -17,12 +20,12 @@ class ThumbnailGenerator:
|
||||
if mime is None:
|
||||
return
|
||||
|
||||
if mime == "image/svg+xml":
|
||||
if mime == "image/svg+xml" and config.cairosvg:
|
||||
|
||||
try:
|
||||
p = Process(target=cairosvg.svg2png, kwargs={"url": path, "write_to": "tmp"})
|
||||
p.start()
|
||||
p.join(1.5)
|
||||
p.join(1)
|
||||
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
@ -50,8 +53,7 @@ class ThumbnailGenerator:
|
||||
.run()
|
||||
)
|
||||
self.generate_image("tmp", dest_path)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
except Exception:
|
||||
print("Couldn't make thumbnail for " + path)
|
||||
|
||||
if os.path.exists("tmp"):
|
||||
|
Loading…
x
Reference in New Issue
Block a user