Finished path search & autocomplete. Added font parsing

This commit is contained in:
simon987
2018-04-16 17:25:34 -04:00
parent b454653d51
commit 6d3cceb1b1
15 changed files with 124 additions and 41 deletions

View File

@@ -6,16 +6,17 @@ import subprocess
import json
import chardet
import html
import warnings
from PIL import Image
from fontTools.ttLib import TTFont, TTLibError
class MimeGuesser:
def guess_mime(self, full_path):
raise NotImplementedError()
class ContentMimeGuesser(MimeGuesser):
def __init__(self):
self.libmagic = magic.Magic(mime=True)
@@ -27,7 +28,6 @@ class ContentMimeGuesser(MimeGuesser):
class ExtensionMimeGuesser(MimeGuesser):
def guess_mime(self, full_path):
return mimetypes.guess_type(full_path, strict=False)[0]
@@ -41,7 +41,6 @@ class FileParser:
class FileCheckSumCalculator:
def checksum(self, path: str) -> str:
"""
Calculate the checksum of a file
@@ -52,7 +51,6 @@ class FileCheckSumCalculator:
class Md5CheckSumCalculator(FileCheckSumCalculator):
def __init__(self):
self.name = "md5"
@@ -72,7 +70,6 @@ class Md5CheckSumCalculator(FileCheckSumCalculator):
class Sha1CheckSumCalculator(FileCheckSumCalculator):
def __init__(self):
self.name = "sha1"
@@ -92,7 +89,6 @@ class Sha1CheckSumCalculator(FileCheckSumCalculator):
class Sha256CheckSumCalculator(FileCheckSumCalculator):
def __init__(self):
self.name = "sha256"
@@ -112,12 +108,10 @@ class Sha256CheckSumCalculator(FileCheckSumCalculator):
class GenericFileParser(FileParser):
mime_types = []
is_default = True
def __init__(self, checksum_calculators: list):
self.checksum_calculators = checksum_calculators
def parse(self, full_path: str) -> dict:
@@ -146,7 +140,6 @@ class GenericFileParser(FileParser):
class MediaFileParser(GenericFileParser):
is_default = False
relevant_properties = ["bit_rate", "nb_streams", "duration", "format_name", "format_long_name"]
@@ -154,7 +147,7 @@ class MediaFileParser(GenericFileParser):
super().__init__(checksum_calculators)
self.mime_types = [
"video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
"video/3gpp", "video/mp4", "video/mpeg", "video/ogg", "video/quicktime",
"video/webm", "video/x-flv", "video/x-mng", "video/x-ms-asf",
"video/x-ms-wmv", "video/x-msvideo", "audio/basic", "auido/L24",
"audio/mid", "audio/mpeg", "audio/mp4", "audio/x-aiff",
@@ -208,15 +201,14 @@ class MediaFileParser(GenericFileParser):
class PictureFileParser(GenericFileParser):
is_default = False
def __init__(self, checksum_calculators: list):
super().__init__(checksum_calculators)
self.mime_types = [
"image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif",
"image/ief", "image/jpeg", "image/ktx", "image/pipeg", "image/pjpeg",
"image/bmp", "image/cgm", "image/cis-cod", "image/g3fax", "image/gif",
"image/ief", "image/jpeg", "image/ktx", "image/pipeg", "image/pjpeg",
"image/png", "image/prs.btif", "image/svg+xml", "image/tiff",
"image/vnd.adobe.photoshop", "image/vnd.dece.graphic", "image/vnd.djvu",
"image/vnd.dvb.subtitle", "image/vnd.dwg", "image/vnd.dxf",
@@ -234,11 +226,9 @@ class PictureFileParser(GenericFileParser):
info = super().parse(full_path)
try:
with open(full_path, "rb") as image_file:
with Image.open(image_file) as image:
info["mode"] = image.mode
info["format"] = image.format
info["width"] = image.width
@@ -250,7 +240,6 @@ class PictureFileParser(GenericFileParser):
class TextFileParser(GenericFileParser):
is_default = False
def __init__(self, checksum_calculators: list, content_lenght: int):
@@ -259,14 +248,14 @@ class TextFileParser(GenericFileParser):
self.mime_types = [
"text/asp", "text/css", "text/ecmascript", "text/html", "text/javascript",
"text/mcf", "text/pascal", "text/plain", "text/richtext", "text/scriplet",
"text/sgml", "text/tab-separated-values", "text/uri-list", "text/vnd.abc",
"text/mcf", "text/pascal", "text/plain", "text/richtext", "text/scriplet",
"text/sgml", "text/tab-separated-values", "text/uri-list", "text/vnd.abc",
"text/vnd.fmi.flexstor", "text/vnd.rn-realtext", "text/vnd.wap.wml",
"text/vnd.wap.wmlscript", "text/webviewhtml", "text/x-asm", "text/x-audiosoft-intra",
"text/x-c", "text/x-component", "text/x-fortran", "text/x-h", "text/x-java-source",
"text/x-la-asf", "text/x-m", "text/x-pascal", "text/x-script",
"text/x-la-asf", "text/x-m", "text/x-pascal", "text/x-script",
"text/x-script.csh", "text/x-script.elisp", "text/x-script.guile",
"text/x-script.ksh", "text/x-script.lisp", "text/x-script.perl",
"text/x-script.ksh", "text/x-script.lisp", "text/x-script.perl",
"text/x-script.perl-module", "text/x-script.phyton", "text/x-script.rexx",
"text/x-script.scheme", "text/x-script.sh", "text/x-script.tcl",
"text/x-script.tcsh", "text/x-script.zsh", "text/x-server-parsed-html",
@@ -282,10 +271,10 @@ class TextFileParser(GenericFileParser):
self.encodings = [
'ascii', 'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437',
'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855',
'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863',
'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp875', 'cp932',
'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', 'cp1140',
'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855',
'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863',
'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp875', 'cp932',
'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', 'cp1140',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc_jp', 'euc_jis_2004',
'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp',
@@ -301,21 +290,57 @@ class TextFileParser(GenericFileParser):
]
def parse(self, full_path: str):
info = super().parse(full_path)
with open(full_path, "rb") as text_file:
raw_content = text_file.read(self.content_lenght)
chardet.detect(raw_content)
encoding = chardet.detect(raw_content)["encoding"]
if encoding is not None and encoding in self.encodings:
info["encoding"] = encoding
content = raw_content.decode(encoding, "ignore")
info["content"] = html.escape(content)
return info
class FontParser(GenericFileParser):
is_default = False
def __init__(self, checksum_calculators: list):
super().__init__(checksum_calculators)
self.mime_types = [
"application/font-sfnt", "application/font-woff", "application/vdn.ms-fontobject",
"application/x-font-ttf"
]
def parse(self, full_path: str):
info = super().parse(full_path)
print(info)
with open(full_path, "rb") as f:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
try:
font = TTFont(f)
if "name" in font:
try:
for name in font["name"].names:
if name.nameID == 4:
info["font_name"] = name.toUnicode("replace")
break
except AssertionError:
print("Could not read font name for " + full_path)
except TTLibError:
print("Could not read font for " + full_path)
return info