Bug fixes, text encoding tag

This commit is contained in:
simon987 2018-04-14 22:31:55 -04:00
parent 8b55c3b681
commit 157d2c1ea5
10 changed files with 31 additions and 70 deletions

View File

@ -2,9 +2,10 @@ default_options = {
"ThumbnailQuality": "85",
"ThumbnailSize": "275",
"ThumbnailColor": "FF00FF",
"TextFileContentLenght": "16384",
"TextFileContentLength": "8192",
"MimeGuesser": "extension", # extension, content
"CheckSumCalculators": "", # md5, sha1, sha256
"FileParsers": "media, text, picture" # media, text, picture
}
index_every = 50000

View File

@ -135,7 +135,7 @@ class TaskManager:
c = Crawler([GenericFileParser(chksum_calcs),
MediaFileParser(chksum_calcs),
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLenght"))),
TextFileParser(chksum_calcs, int(directory.get_option("TextFileContentLength"))),
PictureFileParser(chksum_calcs)],
mime_guesser, self.indexer, directory.id)
c.crawl(directory.path, counter)

View File

@ -63,11 +63,11 @@ class Indexer:
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"path_tokenizer":{"type":"path_hierarchy"}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":4}}}}',
self.es.indices.put_settings(body='{"analysis":{"tokenizer":{"my_nGram_tokenizer":{"type":"nGram","min_gram":3,"max_gram":3}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"path_analyser":{"tokenizer":"path_tokenizer"}}}}',
index=self.index_name)
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase"]}}}}',
self.es.indices.put_settings(body='{"analysis":{"analyzer":{"my_nGram":{"tokenizer":"my_nGram_tokenizer", "filter": ["lowercase", "asciifolding"]}}}}',
index=self.index_name)
self.es.indices.put_mapping(body='{"properties": {'

View File

@ -20,7 +20,10 @@ class ContentMimeGuesser(MimeGuesser):
self.libmagic = magic.Magic(mime=True)
def guess_mime(self, full_path):
return self.libmagic.from_file(full_path)
try:
return self.libmagic.from_file(full_path)
except FileNotFoundError:
return None
class ExtensionMimeGuesser(MimeGuesser):
@ -268,7 +271,13 @@ class TextFileParser(GenericFileParser):
"text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
"text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
"text/x-setext", "text/x-sgml", "text/x-speech", "text/x-uil",
"text/x-uuencode", "text/x-vcalendar", "text/xml"
"text/x-uuencode", "text/x-vcalendar", "text/xml", "text/x-csrc", "text/csv",
"text/x-c++src", "text/x-chdr", "text/markdown", "text/x-sh", "text/x-java",
"text/x-python", "text/x-c++hdr", "text/x-tex", "text/x-diff", "text/x-haskell",
"text/x-perl", "text/x-dsrc", "text/scriptlet", "text/x-scala", "text/calendar",
"text/x-bibtex", "text/x-tcl", "text/x-c++", "text/x-shellscript", "text/x-msdos-batch",
"text/x-makefile", "text/rtf", "text/x-objective-c", "text/troff", "text/x-m4",
"text/x-lisp", "text/x-php", "text/x-gawk", "text/x-awk", "text/x-ruby", "text/x-po"
]
self.encodings = [

View File

@ -1,57 +0,0 @@
from unittest import TestCase
from thumbnail import ThumbnailGenerator
from PIL import Image
import os
import shutil
class ThumbnailGeneratorTest(TestCase):
def test_generate(self):
generator = ThumbnailGenerator(300)
# Original image is 420x315
generator.generate("test_folder/sample_1.jpg", "test_thumb1.jpg", "image/JPEG")
img = Image.open("test_thumb1.jpg")
width, height = img.size
img.close()
self.assertEqual(300, width)
self.assertEqual(225, height)
if os.path.isfile("test_thumb1.jpg"):
os.remove("test_thumb1.jpg")
def test_generate_all(self):
shutil.rmtree("test_thumbnails")
generator = ThumbnailGenerator(300)
docs = [{'_source': {'path': 'test_folder', 'name': 'books.csv'}, '_id': 'books.csv-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_3.jpg'}, '_id': 'sample_3.jpg-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_5.png'}, '_id': 'sample_5.png-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_6.gif'}, '_id': 'sample_6.gif-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_7.bmp'}, '_id': 'sample_7.bmp-ID'},
{'_source': {'path': 'test_folder', 'name': 'sample_2.jpeg'}, '_id': 'sample_2.jpeg-ID'}]
generator.generate_all(docs, "test_thumbnails")
self.assertFalse(os.path.isfile("test_thumbnails/books.csv-ID") and
os.path.getsize("test_thumbnails/books.csv-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_3.jpg-ID") and
os.path.getsize("test_thumbnails/sample_3.jpg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_2.jpeg-ID") and
os.path.getsize("test_thumbnails/sample_2.jpeg-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_5.png-ID") and
os.path.getsize("test_thumbnails/sample_5.png-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_6.gif-ID") and
os.path.getsize("test_thumbnails/sample_6.gif-ID") > 0)
self.assertTrue(os.path.isfile("test_thumbnails/sample_7.bmp-ID") and
os.path.getsize("test_thumbnails/sample_7.bmp-ID") > 0)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -39,6 +39,11 @@
background-color: #FFC107;
}
.badge-text {
color: #FFFFFF;
background-color: #FAAB3C;
}
.card-img-top {
@ -324,7 +329,7 @@
if (hit.hasOwnProperty("highlight") && hit["highlight"].hasOwnProperty("name")) {
title.insertAdjacentHTML('afterbegin', hit["highlight"]["name"] + extension);
} else {
title.appendChild(document.createTextNode(hit["_source"]["name"]));
title.appendChild(document.createTextNode(hit["_source"]["name"] + extension));
}
title.setAttribute("title", hit["_source"]["path"] + hit["_source"]["name"] + extension);
@ -415,6 +420,13 @@
formatTag.appendChild(document.createTextNode(hit["_source"]["format_name"]));
tags.push(formatTag);
break;
case "text":
formatTag = document.createElement("span");
formatTag.setAttribute("class", "badge badge-pill badge-text");
formatTag.appendChild(document.createTextNode(hit["_source"]["encoding"]));
tags.push(formatTag);
break;
}

View File

@ -41,7 +41,6 @@ class ThumbnailGenerator:
os.makedirs(dest_path, exist_ok=True)
for doc in docs:
extension = "" if doc["_source"]["extension"] == "" else "." + doc["_source"]["extension"]
full_path = os.path.join(doc["_source"]["path"], doc["_source"]["name"] + extension)