mirror of
https://github.com/simon987/sist2.git
synced 2025-04-04 07:52:59 +00:00
211 lines
4.9 KiB
Python
211 lines
4.9 KiB
Python
import zlib
|
|
|
|
mimes = {}
|
|
noparse = set()
|
|
ext_in_hash = set()
|
|
mime_ids = {}
|
|
|
|
major_mime = {
|
|
"sist2": 0,
|
|
"model": 1,
|
|
"example": 2,
|
|
"message": 3,
|
|
"multipart": 4,
|
|
"font": 5,
|
|
"video": 6,
|
|
"audio": 7,
|
|
"image": 8,
|
|
"text": 9,
|
|
"application": 10,
|
|
"x-epoc": 11,
|
|
}
|
|
|
|
pdf = (
|
|
"application/pdf",
|
|
"application/epub+zip",
|
|
"application/vnd.ms-xpsdocument",
|
|
)
|
|
|
|
font = (
|
|
"application/vnd.ms-opentype",
|
|
"application/x-ms-compress-szdd"
|
|
"application/x-font-sfn",
|
|
"application/x-font-ttf",
|
|
"font/otf",
|
|
"font/sfnt",
|
|
"font/woff",
|
|
"font/woff2"
|
|
)
|
|
|
|
# Archive "formats"
|
|
archive = (
|
|
"application/x-tar",
|
|
"application/zip",
|
|
"application/x-rar",
|
|
"application/x-arc",
|
|
"application/x-warc",
|
|
"application/x-7z-compressed",
|
|
)
|
|
|
|
# Archive "filters"
|
|
arc_filter = (
|
|
"application/gzip",
|
|
"application/x-bzip2",
|
|
"application/x-xz",
|
|
"application/x-zstd",
|
|
"application/x-lzma",
|
|
"application/x-lz4",
|
|
"application/x-lzip",
|
|
"application/x-lzop",
|
|
)
|
|
|
|
doc = (
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
)
|
|
|
|
mobi = (
|
|
"application/x-mobipocket-ebook",
|
|
"application/vnd.amazon.mobi8-ebook"
|
|
)
|
|
|
|
markup = (
|
|
"text/xml",
|
|
"text/html",
|
|
"text/x-sgml"
|
|
)
|
|
|
|
raw = (
|
|
"image/x-olympus-orf",
|
|
"image/x-nikon-nef",
|
|
"image/x-fuji-raf",
|
|
"image/x-panasonic-raw",
|
|
"image/x-adobe-dng",
|
|
"image/x-canon-cr2",
|
|
"image/x-canon-crw",
|
|
"image/x-dcraw",
|
|
"image/x-kodak-dcr",
|
|
"image/x-kodak-k25",
|
|
"image/x-kodak-kdc",
|
|
"image/x-minolta-mrw",
|
|
"image/x-pentax-pef",
|
|
"image/x-sigma-x3f",
|
|
"image/x-sony-arw",
|
|
"image/x-sony-sr2",
|
|
"image/x-sony-srf",
|
|
"image/x-minolta-mrw",
|
|
"image/x-pentax-pef",
|
|
"image/x-epson-erf",
|
|
)
|
|
|
|
cnt = 1
|
|
|
|
|
|
def mime_id(mime):
|
|
if mime in mime_ids:
|
|
return mime_ids[mime]
|
|
|
|
global cnt
|
|
major = mime.split("/")[0]
|
|
mime_id = str((major_mime[major] << 16) + cnt)
|
|
cnt += 1
|
|
if mime in noparse:
|
|
mime_id += " | 0x80000000"
|
|
elif mime in pdf:
|
|
mime_id += " | 0x40000000"
|
|
elif mime in font:
|
|
mime_id += " | 0x20000000"
|
|
elif mime in archive:
|
|
mime_id += " | 0x10000000"
|
|
elif mime in arc_filter:
|
|
mime_id += " | 0x08000000"
|
|
elif mime in doc:
|
|
mime_id += " | 0x04000000"
|
|
elif mime in mobi:
|
|
mime_id += " | 0x02000000"
|
|
elif mime in markup:
|
|
mime_id += " | 0x01000000"
|
|
elif mime in raw:
|
|
mime_id += " | 0x00800000"
|
|
elif mime == "application/x-empty":
|
|
cnt -= 1
|
|
return "1"
|
|
mime_ids[mime] = mime_id
|
|
return mime_id
|
|
|
|
|
|
def clean(t):
|
|
return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_")
|
|
|
|
|
|
def crc(s):
|
|
return zlib.crc32(s.encode()) & 0xffffffff
|
|
|
|
|
|
with open("scripts/mime.csv") as f:
|
|
for l in f:
|
|
mime, ext_list = l.split(",")
|
|
if l.startswith("!"):
|
|
mime = mime[1:]
|
|
noparse.add(mime)
|
|
ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""]
|
|
mimes[mime] = ext
|
|
|
|
seen_crc = set()
|
|
for ext in mimes.values():
|
|
for e in ext:
|
|
if crc(e) in seen_crc:
|
|
raise Exception("CRC32 collision")
|
|
seen_crc.add(crc(e))
|
|
|
|
seen_crc = set()
|
|
for mime in mimes.keys():
|
|
if crc(mime) in seen_crc:
|
|
raise Exception("CRC32 collision")
|
|
seen_crc.add(crc(mime))
|
|
|
|
print("// **Generated by mime.py**")
|
|
print("#ifndef MIME_GENERATED_C")
|
|
print("#define MIME_GENERATED_C")
|
|
print("#include <stdlib.h>\n")
|
|
# Enum
|
|
print("enum mime {")
|
|
for mime, ext in sorted(mimes.items()):
|
|
print(f"{clean(mime)}={mime_id(mime)},")
|
|
print("};")
|
|
|
|
# Enum -> string
|
|
print("char *mime_get_mime_text(unsigned int mime_id) {"
|
|
"switch (mime_id) {")
|
|
for mime, ext in mimes.items():
|
|
print("case " + clean(mime) + ": return \"" + mime + "\";")
|
|
print("default: return NULL;}}")
|
|
|
|
# Ext -> Enum
|
|
print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {"
|
|
"switch (extension_crc32) {")
|
|
for mime, ext in mimes.items():
|
|
if len(ext) > 0:
|
|
for e in ext:
|
|
print(f"case {crc(e)}:", end="")
|
|
print(f"return {clean(mime)};")
|
|
print("default: return 0;}}")
|
|
|
|
# string -> Enum
|
|
print("unsigned int mime_name_lookup(unsigned long mime_crc32) {"
|
|
"switch (mime_crc32) {")
|
|
for mime in mimes.keys():
|
|
print(f"case {crc(mime)}: return {clean(mime)};")
|
|
|
|
print("default: return 0;}}")
|
|
|
|
# mime list
|
|
|
|
mime_list = ",".join(mime_id(x) for x in mimes.keys()) + ",0"
|
|
|
|
print(f"unsigned int mime_ids[] = {{{mime_list}}};")
|
|
print("unsigned int* get_mime_ids() { return mime_ids; }")
|
|
|
|
print("#endif")
|