mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-10 14:06:41 +00:00
Bug fixes, text encoding tag
This commit is contained in:
parent
8b55c3b681
commit
157d2c1ea5
@ -2,9 +2,10 @@ default_options = {
|
||||
"ThumbnailQuality": "85",
|
||||
"ThumbnailSize": "275",
|
||||
"ThumbnailColor": "FF00FF",
|
||||
"TextFileContentLenght": "16384",
|
||||
"TextFileContentLength": "8192",
|
||||
"MimeGuesser": "extension", # extension, content
|
||||
"CheckSumCalculators": "", # md5, sha1, sha256
|
||||
"FileParsers": "media, text, picture" # media, text, picture
|
||||
}
|
||||
|
||||
index_every = 50000
|
||||
|
@ -135,7 +135,7 @@ class TaskManager:
|
||||
|
||||
c = Crawler([GenericFileParser(chksum_calcs),
|
||||
MediaFileParser(chksum_calcs),
|
||||
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLenght"))),
|
||||
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))),
|
||||
PictureFileParser(chksum_calcs)],
|
||||
mime_guesser, self.indexer, directory.id)
|
||||
c.crawl(directory.path, counter)
|
||||
|
@ -63,11 +63,11 @@ class Indexer:
|
||||
|
||||
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
|
||||
index=self.index_name)
|
||||
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}',
|
||||
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":3}}}}',
|
||||
index=self.index_name)
|
||||
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
|
||||
index=self.index_name)
|
||||
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase"]}}}}',
|
||||
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase", "asciifolding"]}}}}',
|
||||
index=self.index_name)
|
||||
|
||||
self.es.indices.put_mapping(body='{"properties": {'
|
||||
|
13
parsing.py
13
parsing.py
@ -20,7 +20,10 @@ class ContentMimeGuesser(MimeGuesser):
|
||||
self.libmagic = magic.Magic(mime=True)
|
||||
|
||||
def guess_mime(self, full_path):
|
||||
return self.libmagic.from_file(full_path)
|
||||
try:
|
||||
return self.libmagic.from_file(full_path)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
|
||||
class ExtensionMimeGuesser(MimeGuesser):
|
||||
@ -268,7 +271,13 @@ class TextFileParser(GenericFileParser):
|
||||
"text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
|
||||
"text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
|
||||
"text/x-setext", "text/x-sgml", "text/x-speech", "text/x-uil",
|
||||
"text/x-uuencode", "text/x-vcalendar", "text/xml"
|
||||
"text/x-uuencode", "text/x-vcalendar", "text/xml", "text/x-csrc", "text/csv",
|
||||
"text/x-c++src", "text/x-chdr", "text/markdown", "text/x-sh", "text/x-java",
|
||||
"text/x-python", "text/x-c++hdr", "text/x-tex", "text/x-diff", "text/x-haskell",
|
||||
"text/x-perl", "text/x-dsrc", "text/scriptlet", "text/x-scala", "text/calendar",
|
||||
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
|
||||
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
|
||||
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po"
|
||||
]
|
||||
|
||||
self.encodings = [
|
||||
|
@ -1,57 +0,0 @@
|
||||
from unittest import TestCase
|
||||
from thumbnail import ThumbnailGenerator
|
||||
from PIL import Image
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
class ThumbnailGeneratorTest(TestCase):
|
||||
|
||||
def test_generate(self):
|
||||
|
||||
generator = ThumbnailGenerator(300)
|
||||
# Original image is 420x315
|
||||
generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg", "image/JPEG")
|
||||
|
||||
img = Image.open("test_thumb1.jpg")
|
||||
width, height = img.size
|
||||
img.close()
|
||||
|
||||
self.assertEqual(300, width)
|
||||
self.assertEqual(225, height)
|
||||
|
||||
if os.path.isfile("test_thumb1.jpg"):
|
||||
os.remove("test_thumb1.jpg")
|
||||
|
||||
def test_generate_all(self):
|
||||
shutil.rmtree("test_thumbnails")
|
||||
|
||||
generator = ThumbnailGenerator(300)
|
||||
|
||||
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
|
||||
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
|
||||
|
||||
generator.generate_all(docs, "test_thumbnails")
|
||||
|
||||
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
|
||||
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
|
||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
|
||||
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
3
static/css/bootstrap.min.css
vendored
3
static/css/bootstrap.min.css
vendored
File diff suppressed because one or more lines are too long
1
static/css/bootstrap.min.js
vendored
1
static/css/bootstrap.min.js
vendored
File diff suppressed because one or more lines are too long
3
static/js/popper.min.js
vendored
3
static/js/popper.min.js
vendored
File diff suppressed because one or more lines are too long
@ -39,6 +39,11 @@
|
||||
background-color: #FFC107;
|
||||
}
|
||||
|
||||
.badge-text {
|
||||
color: #FFFFFF;
|
||||
background-color: #FAAB3C;
|
||||
}
|
||||
|
||||
|
||||
|
||||
.card-img-top {
|
||||
@ -324,7 +329,7 @@
|
||||
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
|
||||
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
|
||||
} else {
|
||||
title.appendChild(document.createTextNode(hit["_source"]["name"]));
|
||||
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
|
||||
}
|
||||
|
||||
title.setAttribute("title", hit["_source"]["path"] + hit["_source"]["name"] + extension);
|
||||
@ -415,6 +420,13 @@
|
||||
formatTag.appendChild(document.createTextNode(hit["_source"]["format_name"]));
|
||||
tags.push(formatTag);
|
||||
|
||||
break;
|
||||
case "text":
|
||||
formatTag = document.createElement("span");
|
||||
formatTag.setAttribute("class", "badge badge-pill badge-text");
|
||||
formatTag.appendChild(document.createTextNode(hit["_source"]["encoding"]));
|
||||
tags.push(formatTag);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,6 @@ class ThumbnailGenerator:
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
|
||||
for doc in docs:
|
||||
|
||||
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
|
||||
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user