import zlib mimes = {} noparse = set() ext_in_hash = set() mime_ids = {} major_mime = { "sist2": 0, "model": 1, "example": 2, "message": 3, "multipart": 4, "font": 5, "video": 6, "audio": 7, "image": 8, "text": 9, "application": 10, "x-epoc": 11, } pdf = ( "application/pdf", "application/epub+zip", "application/vnd.ms-xpsdocument", ) font = ( "application/vnd.ms-opentype", "application/x-ms-compress-szdd" "application/x-font-sfn", "application/x-font-ttf", "font/otf", "font/sfnt", "font/woff", "font/woff2" ) # Archive "formats" archive = ( "application/x-tar", "application/zip", "application/x-rar", "application/x-arc", "application/x-warc", "application/x-7z-compressed", ) # Archive "filters" arc_filter = ( "application/gzip", "application/x-bzip2", "application/x-xz", "application/x-zstd", "application/x-lzma", "application/x-lz4", "application/x-lzip", "application/x-lzop", ) doc = ( "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) mobi = ( "application/x-mobipocket-ebook", "application/vnd.amazon.mobi8-ebook" ) markup = ( "text/xml", "text/html", "text/x-sgml" ) raw = ( "image/x-olympus-orf", "image/x-nikon-nef", "image/x-fuji-raf", "image/x-panasonic-raw", "image/x-adobe-dng", "image/x-canon-cr2", "image/x-canon-crw", "image/x-dcraw", "image/x-kodak-dcr", "image/x-kodak-k25", "image/x-kodak-kdc", "image/x-minolta-mrw", "image/x-pentax-pef", "image/x-sigma-x3f", "image/x-sony-arw", "image/x-sony-sr2", "image/x-sony-srf", "image/x-minolta-mrw", "image/x-pentax-pef", "image/x-epson-erf", ) cnt = 1 def mime_id(mime): if mime in mime_ids: return mime_ids[mime] global cnt major = mime.split("/")[0] mime_id = str((major_mime[major] << 16) + cnt) cnt += 1 if mime in noparse: mime_id += " | 0x80000000" elif mime in pdf: mime_id += " | 0x40000000" elif mime in font: mime_id += " | 0x20000000" elif mime in archive: mime_id += " | 0x10000000" elif mime in arc_filter: mime_id += " | 0x08000000" elif mime in doc: mime_id += " | 0x04000000" elif mime in mobi: mime_id += " | 0x02000000" elif mime in markup: mime_id += " | 0x01000000" elif mime in raw: mime_id += " | 0x00800000" elif mime == "application/x-empty": cnt -= 1 return "1" mime_ids[mime] = mime_id return mime_id def clean(t): return t.replace("/", "_").replace(".", "_").replace("+", "_").replace("-", "_") def crc(s): return zlib.crc32(s.encode()) & 0xffffffff with open("scripts/mime.csv") as f: for l in f: mime, ext_list = l.split(",") if l.startswith("!"): mime = mime[1:] noparse.add(mime) ext = [x.strip() for x in ext_list.split("|") if x.strip() != ""] mimes[mime] = ext seen_crc = set() for ext in mimes.values(): for e in ext: if crc(e) in seen_crc: raise Exception("CRC32 collision") seen_crc.add(crc(e)) seen_crc = set() for mime in mimes.keys(): if crc(mime) in seen_crc: raise Exception("CRC32 collision") seen_crc.add(crc(mime)) print("// **Generated by mime.py**") print("#ifndef MIME_GENERATED_C") print("#define MIME_GENERATED_C") print("#include \n") # Enum print("enum mime {") for mime, ext in sorted(mimes.items()): print(f"{clean(mime)}={mime_id(mime)},") print("};") # Enum -> string print("char *mime_get_mime_text(unsigned int mime_id) {" "switch (mime_id) {") for mime, ext in mimes.items(): print("case " + clean(mime) + ": return \"" + mime + "\";") print("default: return NULL;}}") # Ext -> Enum print("unsigned int mime_extension_lookup(unsigned long extension_crc32) {" "switch (extension_crc32) {") for mime, ext in mimes.items(): if len(ext) > 0: for e in ext: print(f"case {crc(e)}:", end="") print(f"return {clean(mime)};") print("default: return 0;}}") # string -> Enum print("unsigned int mime_name_lookup(unsigned long mime_crc32) {" "switch (mime_crc32) {") for mime in mimes.keys(): print(f"case {crc(mime)}: return {clean(mime)};") print("default: return 0;}}") # mime list mime_list = ",".join(mime_id(x) for x in mimes.keys()) + ",0" print(f"unsigned int mime_ids[] = {{{mime_list}}};") print("unsigned int* get_mime_ids() { return mime_ids; }") print("#endif")