mirror of
https://github.com/simon987/Simple-Incremental-Search-Tool.git
synced 2025-04-19 18:16:45 +00:00
Bug fixes, text encoding tag
This commit is contained in:
parent
8b55c3b681
commit
157d2c1ea5
@ -2,9 +2,10 @@ default_options = {
|
|||||||
"ThumbnailQuality": "85",
|
"ThumbnailQuality": "85",
|
||||||
"ThumbnailSize": "275",
|
"ThumbnailSize": "275",
|
||||||
"ThumbnailColor": "FF00FF",
|
"ThumbnailColor": "FF00FF",
|
||||||
"TextFileContentLenght": "16384",
|
"TextFileContentLength": "8192",
|
||||||
"MimeGuesser": "extension", # extension, content
|
"MimeGuesser": "extension", # extension, content
|
||||||
"CheckSumCalculators": "", # md5, sha1, sha256
|
"CheckSumCalculators": "", # md5, sha1, sha256
|
||||||
|
"FileParsers": "media, text, picture" # media, text, picture
|
||||||
}
|
}
|
||||||
|
|
||||||
index_every = 50000
|
index_every = 50000
|
||||||
|
@ -135,7 +135,7 @@ class TaskManager:
|
|||||||
|
|
||||||
c = Crawler([GenericFileParser(chksum_calcs),
|
c = Crawler([GenericFileParser(chksum_calcs),
|
||||||
MediaFileParser(chksum_calcs),
|
MediaFileParser(chksum_calcs),
|
||||||
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLenght"))),
|
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))),
|
||||||
PictureFileParser(chksum_calcs)],
|
PictureFileParser(chksum_calcs)],
|
||||||
mime_guesser, self.indexer, directory.id)
|
mime_guesser, self.indexer, directory.id)
|
||||||
c.crawl(directory.path, counter)
|
c.crawl(directory.path, counter)
|
||||||
|
@ -63,11 +63,11 @@ class Indexer:
|
|||||||
|
|
||||||
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
|
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
|
||||||
index=self.index_name)
|
index=self.index_name)
|
||||||
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}',
|
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":3}}}}',
|
||||||
index=self.index_name)
|
index=self.index_name)
|
||||||
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
|
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
|
||||||
index=self.index_name)
|
index=self.index_name)
|
||||||
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase"]}}}}',
|
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase", "asciifolding"]}}}}',
|
||||||
index=self.index_name)
|
index=self.index_name)
|
||||||
|
|
||||||
self.es.indices.put_mapping(body='{"properties": {'
|
self.es.indices.put_mapping(body='{"properties": {'
|
||||||
|
13
parsing.py
13
parsing.py
@ -20,7 +20,10 @@ class ContentMimeGuesser(MimeGuesser):
|
|||||||
self.libmagic = magic.Magic(mime=True)
|
self.libmagic = magic.Magic(mime=True)
|
||||||
|
|
||||||
def guess_mime(self, full_path):
|
def guess_mime(self, full_path):
|
||||||
return self.libmagic.from_file(full_path)
|
try:
|
||||||
|
return self.libmagic.from_file(full_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ExtensionMimeGuesser(MimeGuesser):
|
class ExtensionMimeGuesser(MimeGuesser):
|
||||||
@ -268,7 +271,13 @@ class TextFileParser(GenericFileParser):
|
|||||||
"text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
|
"text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
|
||||||
"text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
|
"text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
|
||||||
"text/x-setext", "text/x-sgml", "text/x-speech", "text/x-uil",
|
"text/x-setext", "text/x-sgml", "text/x-speech", "text/x-uil",
|
||||||
"text/x-uuencode", "text/x-vcalendar", "text/xml"
|
"text/x-uuencode", "text/x-vcalendar", "text/xml", "text/x-csrc", "text/csv",
|
||||||
|
"text/x-c++src", "text/x-chdr", "text/markdown", "text/x-sh", "text/x-java",
|
||||||
|
"text/x-python", "text/x-c++hdr", "text/x-tex", "text/x-diff", "text/x-haskell",
|
||||||
|
"text/x-perl", "text/x-dsrc", "text/scriptlet", "text/x-scala", "text/calendar",
|
||||||
|
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
|
||||||
|
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
|
||||||
|
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po"
|
||||||
]
|
]
|
||||||
|
|
||||||
self.encodings = [
|
self.encodings = [
|
||||||
|
@ -1,57 +0,0 @@
|
|||||||
from unittest import TestCase
|
|
||||||
from thumbnail import ThumbnailGenerator
|
|
||||||
from PIL import Image
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
|
|
||||||
class ThumbnailGeneratorTest(TestCase):
|
|
||||||
|
|
||||||
def test_generate(self):
|
|
||||||
|
|
||||||
generator = ThumbnailGenerator(300)
|
|
||||||
# Original image is 420x315
|
|
||||||
generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg", "image/JPEG")
|
|
||||||
|
|
||||||
img = Image.open("test_thumb1.jpg")
|
|
||||||
width, height = img.size
|
|
||||||
img.close()
|
|
||||||
|
|
||||||
self.assertEqual(300, width)
|
|
||||||
self.assertEqual(225, height)
|
|
||||||
|
|
||||||
if os.path.isfile("test_thumb1.jpg"):
|
|
||||||
os.remove("test_thumb1.jpg")
|
|
||||||
|
|
||||||
def test_generate_all(self):
|
|
||||||
shutil.rmtree("test_thumbnails")
|
|
||||||
|
|
||||||
generator = ThumbnailGenerator(300)
|
|
||||||
|
|
||||||
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
|
|
||||||
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
|
|
||||||
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
|
|
||||||
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
|
|
||||||
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
|
|
||||||
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
|
|
||||||
|
|
||||||
generator.generate_all(docs, "test_thumbnails")
|
|
||||||
|
|
||||||
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
|
|
||||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
|
|
||||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
|
|
||||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
|
|
||||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
|
|
||||||
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
|
|
||||||
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
1
static/css/bootstrap.min.css
vendored
1
static/css/bootstrap.min.css
vendored
File diff suppressed because one or more lines are too long
1
static/css/bootstrap.min.js
vendored
1
static/css/bootstrap.min.js
vendored
File diff suppressed because one or more lines are too long
1
static/js/popper.min.js
vendored
1
static/js/popper.min.js
vendored
File diff suppressed because one or more lines are too long
@ -39,6 +39,11 @@
|
|||||||
background-color: #FFC107;
|
background-color: #FFC107;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.badge-text {
|
||||||
|
color: #FFFFFF;
|
||||||
|
background-color: #FAAB3C;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.card-img-top {
|
.card-img-top {
|
||||||
@ -324,7 +329,7 @@
|
|||||||
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
|
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
|
||||||
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
|
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
|
||||||
} else {
|
} else {
|
||||||
title.appendChild(document.createTextNode(hit["_source"]["name"]));
|
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
|
||||||
}
|
}
|
||||||
|
|
||||||
title.setAttribute("title", hit["_source"]["path"] + hit["_source"]["name"] + extension);
|
title.setAttribute("title", hit["_source"]["path"] + hit["_source"]["name"] + extension);
|
||||||
@ -415,6 +420,13 @@
|
|||||||
formatTag.appendChild(document.createTextNode(hit["_source"]["format_name"]));
|
formatTag.appendChild(document.createTextNode(hit["_source"]["format_name"]));
|
||||||
tags.push(formatTag);
|
tags.push(formatTag);
|
||||||
|
|
||||||
|
break;
|
||||||
|
case "text":
|
||||||
|
formatTag = document.createElement("span");
|
||||||
|
formatTag.setAttribute("class", "badge badge-pill badge-text");
|
||||||
|
formatTag.appendChild(document.createTextNode(hit["_source"]["encoding"]));
|
||||||
|
tags.push(formatTag);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,6 @@ class ThumbnailGenerator:
|
|||||||
os.makedirs(dest_path, exist_ok=True)
|
os.makedirs(dest_path, exist_ok=True)
|
||||||
|
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
|
||||||
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
|
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
|
||||||
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)
|
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user