diff --git a/scripts/mime.csv b/scripts/mime.csv index a3a521d..5b30d31 100644 --- a/scripts/mime.csv +++ b/scripts/mime.csv @@ -22,6 +22,7 @@ application/java-archive, jar application/java, class application/javascript, application/json, json +application/ndjson, jsonl|ndjson application/marc, mrc application/mbedlet, mbd application/mime, aps diff --git a/src/ctx.h b/src/ctx.h index b98a316..252e96e 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -15,6 +15,7 @@ #include "libscan/raw/raw.h" #include "libscan/msdoc/msdoc.h" #include "libscan/wpd/wpd.h" +#include "libscan/json/json.h" #include "src/io/store.h" #include @@ -62,6 +63,7 @@ typedef struct { scan_raw_ctx_t raw_ctx; scan_msdoc_ctx_t msdoc_ctx; scan_wpd_ctx_t wpd_ctx; + scan_json_ctx_t json_ctx; } ScanCtx_t; typedef struct { diff --git a/src/main.c b/src/main.c index 4131b64..9f8dd64 100644 --- a/src/main.c +++ b/src/main.c @@ -259,10 +259,18 @@ void initialize_scan_context(scan_args_t *args) { ScanCtx.raw_ctx.logf = _logf; ScanCtx.raw_ctx.store = _store; + // Wpd ScanCtx.wpd_ctx.content_size = args->content_size; ScanCtx.wpd_ctx.log = _log; ScanCtx.wpd_ctx.logf = _logf; ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect"); + + // Json + ScanCtx.json_ctx.content_size = args->content_size; + ScanCtx.json_ctx.log = _log; + ScanCtx.json_ctx.logf = _logf; + ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json"); + ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson"); } diff --git a/src/parsing/mime_generated.c b/src/parsing/mime_generated.c index c40a4b0..3b8fed8 100644 --- a/src/parsing/mime_generated.c +++ b/src/parsing/mime_generated.c @@ -35,425 +35,426 @@ enum mime { application_mime=655387, application_mspowerpoint=655388, application_msword=655389, - application_netmc=655390, - application_octet_stream=655391, - application_oda=655392, - application_ogg=655393, - application_pdf=655394 | 0x40000000, - application_pgp_keys=655395, - application_pgp_signature=655396, - application_pkcs7_signature=655397, - application_pkix_cert=655398, - application_postscript=655399, - application_pro_eng=655400, - application_ringing_tones=655401, - application_smil=655402, - application_solids=655403, - application_sounder=655404, - application_step=655405, - application_streamingmedia=655406, - application_vda=655407, - application_vnd_amazon_mobi8_ebook=655408 | 0x02000000, - application_vnd_coffeescript=655409, - application_vnd_fdf=655410, - application_vnd_font_fontforge_sfd=655411, - application_vnd_hp_hpgl=655412, - application_vnd_iccprofile=655413, - application_vnd_lotus_1_2_3=655414, - application_vnd_ms_cab_compressed=655415, - application_vnd_ms_excel=655416, - application_vnd_ms_fontobject=655417, - application_vnd_ms_opentype=655418 | 0x20000000, - application_vnd_ms_outlook=655419, - application_vnd_ms_pki_certstore=655420, - application_vnd_ms_pki_pko=655421, - application_vnd_ms_pki_seccat=655422, - application_vnd_ms_powerpoint=655423, - application_vnd_ms_project=655424, - application_vnd_oasis_opendocument_base=655425, - application_vnd_oasis_opendocument_formula=655426, - application_vnd_oasis_opendocument_graphics=655427, - application_vnd_oasis_opendocument_presentation=655428, - application_vnd_oasis_opendocument_spreadsheet=655429, - application_vnd_oasis_opendocument_text=655430, - application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000, - application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000, - application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000, - application_vnd_symbian_install=655434, - application_vnd_tcpdump_pcap=655435, - application_vnd_wap_wmlc=655436, - application_vnd_wap_wmlscriptc=655437, - application_vnd_xara=655438, - application_vocaltec_media_desc=655439, - application_vocaltec_media_file=655440, - application_warc=655441, - application_winhelp=655442, - application_wordperfect=655443, - application_x_123=655444, - application_x_7z_compressed=655445 | 0x10000000, - application_x_aim=655446, - application_x_apple_diskimage=655447, - application_x_arc=655448 | 0x10000000, - application_x_archive=655449, - application_x_atari_7800_rom=655450, - application_x_authorware_bin=655451, - application_x_authorware_map=655452, - application_x_authorware_seg=655453, - application_x_avira_qua=655454, - application_x_bcpio=655455, - application_x_bittorrent=655456, - application_x_bsh=655457, - application_x_bytecode_python=655458, - application_x_bzip=655459, - application_x_bzip2=655460 | 0x08000000, - application_x_cbr=655461, - application_x_cbz=655462, - application_x_cdlink=655463, - application_x_chat=655464, - application_x_chrome_extension=655465, - application_x_cocoa=655466, - application_x_conference=655467, - application_x_coredump=655468, - application_x_cpio=655469, - application_x_dbf=655470, - application_x_dbt=655471, - application_x_debian_package=655472, - application_x_deepv=655473, - application_x_director=655474, - application_x_dmp=655475, - application_x_dosdriver=655476, - application_x_dosexec=655477, - application_x_dvi=655478, - application_x_elc=655479, + application_ndjson=655390, + application_netmc=655391, + application_octet_stream=655392, + application_oda=655393, + application_ogg=655394, + application_pdf=655395 | 0x40000000, + application_pgp_keys=655396, + application_pgp_signature=655397, + application_pkcs7_signature=655398, + application_pkix_cert=655399, + application_postscript=655400, + application_pro_eng=655401, + application_ringing_tones=655402, + application_smil=655403, + application_solids=655404, + application_sounder=655405, + application_step=655406, + application_streamingmedia=655407, + application_vda=655408, + application_vnd_amazon_mobi8_ebook=655409 | 0x02000000, + application_vnd_coffeescript=655410, + application_vnd_fdf=655411, + application_vnd_font_fontforge_sfd=655412, + application_vnd_hp_hpgl=655413, + application_vnd_iccprofile=655414, + application_vnd_lotus_1_2_3=655415, + application_vnd_ms_cab_compressed=655416, + application_vnd_ms_excel=655417, + application_vnd_ms_fontobject=655418, + application_vnd_ms_opentype=655419 | 0x20000000, + application_vnd_ms_outlook=655420, + application_vnd_ms_pki_certstore=655421, + application_vnd_ms_pki_pko=655422, + application_vnd_ms_pki_seccat=655423, + application_vnd_ms_powerpoint=655424, + application_vnd_ms_project=655425, + application_vnd_oasis_opendocument_base=655426, + application_vnd_oasis_opendocument_formula=655427, + application_vnd_oasis_opendocument_graphics=655428, + application_vnd_oasis_opendocument_presentation=655429, + application_vnd_oasis_opendocument_spreadsheet=655430, + application_vnd_oasis_opendocument_text=655431, + application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000, + application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000, + application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000, + application_vnd_symbian_install=655435, + application_vnd_tcpdump_pcap=655436, + application_vnd_wap_wmlc=655437, + application_vnd_wap_wmlscriptc=655438, + application_vnd_xara=655439, + application_vocaltec_media_desc=655440, + application_vocaltec_media_file=655441, + application_warc=655442, + application_winhelp=655443, + application_wordperfect=655444, + application_x_123=655445, + application_x_7z_compressed=655446 | 0x10000000, + application_x_aim=655447, + application_x_apple_diskimage=655448, + application_x_arc=655449 | 0x10000000, + application_x_archive=655450, + application_x_atari_7800_rom=655451, + application_x_authorware_bin=655452, + application_x_authorware_map=655453, + application_x_authorware_seg=655454, + application_x_avira_qua=655455, + application_x_bcpio=655456, + application_x_bittorrent=655457, + application_x_bsh=655458, + application_x_bytecode_python=655459, + application_x_bzip=655460, + application_x_bzip2=655461 | 0x08000000, + application_x_cbr=655462, + application_x_cbz=655463, + application_x_cdlink=655464, + application_x_chat=655465, + application_x_chrome_extension=655466, + application_x_cocoa=655467, + application_x_conference=655468, + application_x_coredump=655469, + application_x_cpio=655470, + application_x_dbf=655471, + application_x_dbt=655472, + application_x_debian_package=655473, + application_x_deepv=655474, + application_x_director=655475, + application_x_dmp=655476, + application_x_dosdriver=655477, + application_x_dosexec=655478, + application_x_dvi=655479, + application_x_elc=655480, application_x_empty=1, - application_x_envoy=655480, - application_x_esrehber=655481, - application_x_excel=655482, - application_x_executable=655483, - application_x_font_gdos=655484, - application_x_font_pf2=655485, - application_x_font_pfm=655486, - application_x_font_sfn=655487, - application_x_font_ttf=655488 | 0x20000000, - application_x_fptapplication_x_dbt=655489, - application_x_freelance=655490, - application_x_gamecube_rom=655491, - application_x_gdbm=655492, - application_x_gettext_translation=655493, - application_x_git=655494, - application_x_gsp=655495, - application_x_gss=655496, - application_x_gtar=655497, - application_x_gzip=655498, - application_x_hdf=655499, - application_x_helpfile=655500, - application_x_httpd_imap=655501, - application_x_ima=655502, - application_x_innosetup=655503, - application_x_internett_signup=655504, - application_x_inventor=655505, - application_x_ip2=655506, - application_x_java_applet=655507, - application_x_java_commerce=655508, - application_x_java_image=655509, - application_x_java_jmod=655510, - application_x_java_keystore=655511, - application_x_kdelnk=655512, - application_x_koan=655513, - application_x_latex=655514, - application_x_livescreen=655515, - application_x_lotus=655516, - application_x_lz4=655517 | 0x08000000, - application_x_lz4_json=655518, - application_x_lzh=655519, - application_x_lzh_compressed=655520, - application_x_lzip=655521 | 0x08000000, - application_x_lzma=655522 | 0x08000000, - application_x_lzop=655523 | 0x08000000, - application_x_lzx=655524, - application_x_mach_binary=655525, - application_x_mach_executable=655526, - application_x_magic_cap_package_1_0=655527, - application_x_mathcad=655528, - application_x_maxis_dbpf=655529, - application_x_meme=655530, - application_x_midi=655531, - application_x_mif=655532, - application_x_mix_transfer=655533, - application_x_mobipocket_ebook=655534 | 0x02000000, - application_x_ms_compress_szdd=655535, - application_x_ms_pdb=655536, - application_x_ms_reader=655537, - application_x_msaccess=655538, - application_x_n64_rom=655539, - application_x_navi_animation=655540, - application_x_navidoc=655541, - application_x_navimap=655542, - application_x_navistyle=655543, - application_x_nes_rom=655544, - application_x_netcdf=655545, - application_x_newton_compatible_pkg=655546, - application_x_nintendo_ds_rom=655547, - application_x_object=655548, - application_x_omc=655549, - application_x_omcdatamaker=655550, - application_x_omcregerator=655551, - application_x_pagemaker=655552, - application_x_pcl=655553, - application_x_pgp_keyring=655554, - application_x_pixclscript=655555, - application_x_pkcs7_certreqresp=655556, - application_x_pkcs7_signature=655557, - application_x_project=655558, - application_x_qpro=655559, - application_x_rar=655560 | 0x10000000, - application_x_rpm=655561, - application_x_sdp=655562, - application_x_sea=655563, - application_x_seelogo=655564, - application_x_setupscript=655565, - application_x_shar=655566, - application_x_sharedlib=655567, - application_x_shockwave_flash=655568, - application_x_snappy_framed=655569, - application_x_sprite=655570, - application_x_sqlite3=655571, - application_x_stargallery_thm=655572, - application_x_stuffit=655573, - application_x_sv4cpio=655574, - application_x_sv4crc=655575, - application_x_tar=655576 | 0x10000000, - application_x_tbook=655577, - application_x_terminfo=655578, - application_x_terminfo2=655579, - application_x_tex_tfm=655580, - application_x_texinfo=655581, - application_x_ustar=655582, - application_x_visio=655583, - application_x_vnd_audioexplosion_mzz=655584, - application_x_vnd_ls_xpix=655585, - application_x_vrml=655586, - application_x_wais_source=655587, - application_x_wine_extension_ini=655588, - application_x_wintalk=655589, - application_x_world=655590, - application_x_wri=655591, - application_x_x509_ca_cert=655592, - application_x_xz=655593 | 0x08000000, - application_x_zip=655594, - application_x_zstd=655595 | 0x08000000, - application_x_zstd_dictionary=655596, - application_xml=655597, - application_zip=655598 | 0x10000000, - application_zlib=655599, - audio_basic=458992 | 0x80000000, - audio_it=458993, - audio_make=458994, - audio_mid=458995, - audio_midi=458996, - audio_mp4=458997, - audio_mpeg=458998, - audio_ogg=458999, - audio_s3m=459000, - audio_tsp_audio=459001, - audio_tsplayer=459002, - audio_vnd_qcelp=459003, - audio_voxware=459004, - audio_x_aiff=459005, - audio_x_flac=459006, - audio_x_gsm=459007, - audio_x_hx_aac_adts=459008, - audio_x_jam=459009, - audio_x_liveaudio=459010, - audio_x_m4a=459011, - audio_x_midi=459012, - audio_x_mod=459013, - audio_x_mp4a_latm=459014, - audio_x_mpeg_3=459015, - audio_x_mpequrl=459016, - audio_x_nspaudio=459017, - audio_x_pn_realaudio=459018, - audio_x_psid=459019, - audio_x_realaudio=459020, - audio_x_s3m=459021, - audio_x_twinvq=459022, - audio_x_twinvq_plugin=459023, - audio_x_voc=459024, - audio_x_wav=459025, - audio_x_xbox_executable=459026 | 0x80000000, - audio_x_xbox360_executable=459027 | 0x80000000, - audio_xm=459028, - font_otf=327957 | 0x20000000, - font_sfnt=327958 | 0x20000000, - font_woff=327959 | 0x20000000, - font_woff2=327960 | 0x20000000, - image_bmp=524569, - image_cmu_raster=524570, - image_fif=524571, - image_florian=524572, - image_g3fax=524573, - image_gif=524574, - image_heic=524575, - image_ief=524576, - image_jpeg=524577, - image_jutvision=524578, - image_naplps=524579, - image_pict=524580, - image_png=524581, - image_svg=524582 | 0x80000000, - image_svg_xml=524583 | 0x80000000, - image_tiff=524584, - image_vnd_adobe_photoshop=524585 | 0x80000000, - image_vnd_djvu=524586 | 0x80000000, - image_vnd_fpx=524587, - image_vnd_microsoft_icon=524588, - image_vnd_rn_realflash=524589, - image_vnd_rn_realpix=524590, - image_vnd_wap_wbmp=524591, - image_vnd_xiff=524592, - image_webp=524593, - image_wmf=524594, - image_x_3ds=524595, - image_x_adobe_dng=524596 | 0x00800000, - image_x_award_bioslogo=524597, - image_x_canon_cr2=524598 | 0x00800000, - image_x_canon_crw=524599 | 0x00800000, - image_x_cmu_raster=524600, - image_x_cur=524601, - image_x_dcraw=524602 | 0x00800000, - image_x_dwg=524603, - image_x_eps=524604, - image_x_epson_erf=524605 | 0x00800000, - image_x_exr=524606, - image_x_fuji_raf=524607 | 0x00800000, - image_x_gem=524608, - image_x_icns=524609, - image_x_icon=524610 | 0x80000000, - image_x_jg=524611, - image_x_jps=524612, - image_x_kodak_dcr=524613 | 0x00800000, - image_x_kodak_k25=524614 | 0x00800000, - image_x_kodak_kdc=524615 | 0x00800000, - image_x_minolta_mrw=524616 | 0x00800000, - image_x_ms_bmp=524617, - image_x_niff=524618, - image_x_nikon_nef=524619 | 0x00800000, - image_x_olympus_orf=524620 | 0x00800000, - image_x_panasonic_raw=524621 | 0x00800000, - image_x_pcx=524622, - image_x_pentax_pef=524623 | 0x00800000, - image_x_pict=524624, - image_x_portable_bitmap=524625, - image_x_portable_graymap=524626, - image_x_portable_pixmap=524627, - image_x_quicktime=524628, - image_x_rgb=524629, - image_x_sigma_x3f=524630 | 0x00800000, - image_x_sony_arw=524631 | 0x00800000, - image_x_sony_sr2=524632 | 0x00800000, - image_x_sony_srf=524633 | 0x00800000, - image_x_tga=524634, - image_x_tiff=524635, - image_x_win_bitmap=524636, - image_x_xcf=524637 | 0x80000000, - image_x_xpixmap=524638 | 0x80000000, - image_x_xwindowdump=524639, - message_news=196960, - message_rfc822=196961, - model_vnd_dwf=65890, - model_vnd_gdl=65891, - model_vnd_gs_gdl=65892, - model_vrml=65893, - model_x_pov=65894, + application_x_envoy=655481, + application_x_esrehber=655482, + application_x_excel=655483, + application_x_executable=655484, + application_x_font_gdos=655485, + application_x_font_pf2=655486, + application_x_font_pfm=655487, + application_x_font_sfn=655488, + application_x_font_ttf=655489 | 0x20000000, + application_x_fptapplication_x_dbt=655490, + application_x_freelance=655491, + application_x_gamecube_rom=655492, + application_x_gdbm=655493, + application_x_gettext_translation=655494, + application_x_git=655495, + application_x_gsp=655496, + application_x_gss=655497, + application_x_gtar=655498, + application_x_gzip=655499, + application_x_hdf=655500, + application_x_helpfile=655501, + application_x_httpd_imap=655502, + application_x_ima=655503, + application_x_innosetup=655504, + application_x_internett_signup=655505, + application_x_inventor=655506, + application_x_ip2=655507, + application_x_java_applet=655508, + application_x_java_commerce=655509, + application_x_java_image=655510, + application_x_java_jmod=655511, + application_x_java_keystore=655512, + application_x_kdelnk=655513, + application_x_koan=655514, + application_x_latex=655515, + application_x_livescreen=655516, + application_x_lotus=655517, + application_x_lz4=655518 | 0x08000000, + application_x_lz4_json=655519, + application_x_lzh=655520, + application_x_lzh_compressed=655521, + application_x_lzip=655522 | 0x08000000, + application_x_lzma=655523 | 0x08000000, + application_x_lzop=655524 | 0x08000000, + application_x_lzx=655525, + application_x_mach_binary=655526, + application_x_mach_executable=655527, + application_x_magic_cap_package_1_0=655528, + application_x_mathcad=655529, + application_x_maxis_dbpf=655530, + application_x_meme=655531, + application_x_midi=655532, + application_x_mif=655533, + application_x_mix_transfer=655534, + application_x_mobipocket_ebook=655535 | 0x02000000, + application_x_ms_compress_szdd=655536, + application_x_ms_pdb=655537, + application_x_ms_reader=655538, + application_x_msaccess=655539, + application_x_n64_rom=655540, + application_x_navi_animation=655541, + application_x_navidoc=655542, + application_x_navimap=655543, + application_x_navistyle=655544, + application_x_nes_rom=655545, + application_x_netcdf=655546, + application_x_newton_compatible_pkg=655547, + application_x_nintendo_ds_rom=655548, + application_x_object=655549, + application_x_omc=655550, + application_x_omcdatamaker=655551, + application_x_omcregerator=655552, + application_x_pagemaker=655553, + application_x_pcl=655554, + application_x_pgp_keyring=655555, + application_x_pixclscript=655556, + application_x_pkcs7_certreqresp=655557, + application_x_pkcs7_signature=655558, + application_x_project=655559, + application_x_qpro=655560, + application_x_rar=655561 | 0x10000000, + application_x_rpm=655562, + application_x_sdp=655563, + application_x_sea=655564, + application_x_seelogo=655565, + application_x_setupscript=655566, + application_x_shar=655567, + application_x_sharedlib=655568, + application_x_shockwave_flash=655569, + application_x_snappy_framed=655570, + application_x_sprite=655571, + application_x_sqlite3=655572, + application_x_stargallery_thm=655573, + application_x_stuffit=655574, + application_x_sv4cpio=655575, + application_x_sv4crc=655576, + application_x_tar=655577 | 0x10000000, + application_x_tbook=655578, + application_x_terminfo=655579, + application_x_terminfo2=655580, + application_x_tex_tfm=655581, + application_x_texinfo=655582, + application_x_ustar=655583, + application_x_visio=655584, + application_x_vnd_audioexplosion_mzz=655585, + application_x_vnd_ls_xpix=655586, + application_x_vrml=655587, + application_x_wais_source=655588, + application_x_wine_extension_ini=655589, + application_x_wintalk=655590, + application_x_world=655591, + application_x_wri=655592, + application_x_x509_ca_cert=655593, + application_x_xz=655594 | 0x08000000, + application_x_zip=655595, + application_x_zstd=655596 | 0x08000000, + application_x_zstd_dictionary=655597, + application_xml=655598, + application_zip=655599 | 0x10000000, + application_zlib=655600, + audio_basic=458993 | 0x80000000, + audio_it=458994, + audio_make=458995, + audio_mid=458996, + audio_midi=458997, + audio_mp4=458998, + audio_mpeg=458999, + audio_ogg=459000, + audio_s3m=459001, + audio_tsp_audio=459002, + audio_tsplayer=459003, + audio_vnd_qcelp=459004, + audio_voxware=459005, + audio_x_aiff=459006, + audio_x_flac=459007, + audio_x_gsm=459008, + audio_x_hx_aac_adts=459009, + audio_x_jam=459010, + audio_x_liveaudio=459011, + audio_x_m4a=459012, + audio_x_midi=459013, + audio_x_mod=459014, + audio_x_mp4a_latm=459015, + audio_x_mpeg_3=459016, + audio_x_mpequrl=459017, + audio_x_nspaudio=459018, + audio_x_pn_realaudio=459019, + audio_x_psid=459020, + audio_x_realaudio=459021, + audio_x_s3m=459022, + audio_x_twinvq=459023, + audio_x_twinvq_plugin=459024, + audio_x_voc=459025, + audio_x_wav=459026, + audio_x_xbox_executable=459027 | 0x80000000, + audio_x_xbox360_executable=459028 | 0x80000000, + audio_xm=459029, + font_otf=327958 | 0x20000000, + font_sfnt=327959 | 0x20000000, + font_woff=327960 | 0x20000000, + font_woff2=327961 | 0x20000000, + image_bmp=524570, + image_cmu_raster=524571, + image_fif=524572, + image_florian=524573, + image_g3fax=524574, + image_gif=524575, + image_heic=524576, + image_ief=524577, + image_jpeg=524578, + image_jutvision=524579, + image_naplps=524580, + image_pict=524581, + image_png=524582, + image_svg=524583 | 0x80000000, + image_svg_xml=524584 | 0x80000000, + image_tiff=524585, + image_vnd_adobe_photoshop=524586 | 0x80000000, + image_vnd_djvu=524587 | 0x80000000, + image_vnd_fpx=524588, + image_vnd_microsoft_icon=524589, + image_vnd_rn_realflash=524590, + image_vnd_rn_realpix=524591, + image_vnd_wap_wbmp=524592, + image_vnd_xiff=524593, + image_webp=524594, + image_wmf=524595, + image_x_3ds=524596, + image_x_adobe_dng=524597 | 0x00800000, + image_x_award_bioslogo=524598, + image_x_canon_cr2=524599 | 0x00800000, + image_x_canon_crw=524600 | 0x00800000, + image_x_cmu_raster=524601, + image_x_cur=524602, + image_x_dcraw=524603 | 0x00800000, + image_x_dwg=524604, + image_x_eps=524605, + image_x_epson_erf=524606 | 0x00800000, + image_x_exr=524607, + image_x_fuji_raf=524608 | 0x00800000, + image_x_gem=524609, + image_x_icns=524610, + image_x_icon=524611 | 0x80000000, + image_x_jg=524612, + image_x_jps=524613, + image_x_kodak_dcr=524614 | 0x00800000, + image_x_kodak_k25=524615 | 0x00800000, + image_x_kodak_kdc=524616 | 0x00800000, + image_x_minolta_mrw=524617 | 0x00800000, + image_x_ms_bmp=524618, + image_x_niff=524619, + image_x_nikon_nef=524620 | 0x00800000, + image_x_olympus_orf=524621 | 0x00800000, + image_x_panasonic_raw=524622 | 0x00800000, + image_x_pcx=524623, + image_x_pentax_pef=524624 | 0x00800000, + image_x_pict=524625, + image_x_portable_bitmap=524626, + image_x_portable_graymap=524627, + image_x_portable_pixmap=524628, + image_x_quicktime=524629, + image_x_rgb=524630, + image_x_sigma_x3f=524631 | 0x00800000, + image_x_sony_arw=524632 | 0x00800000, + image_x_sony_sr2=524633 | 0x00800000, + image_x_sony_srf=524634 | 0x00800000, + image_x_tga=524635, + image_x_tiff=524636, + image_x_win_bitmap=524637, + image_x_xcf=524638 | 0x80000000, + image_x_xpixmap=524639 | 0x80000000, + image_x_xwindowdump=524640, + message_news=196961, + message_rfc822=196962, + model_vnd_dwf=65891, + model_vnd_gdl=65892, + model_vnd_gs_gdl=65893, + model_vrml=65894, + model_x_pov=65895, sist2_sidecar=2, - text_PGP=590183, - text_asp=590184, - text_css=590185, - text_html=590186 | 0x01000000, - text_javascript=590187, - text_mcf=590188, - text_pascal=590189, - text_plain=590190, - text_richtext=590191, - text_rtf=590192, - text_scriplet=590193, - text_tab_separated_values=590194, - text_troff=590195, - text_uri_list=590196, - text_vnd_abc=590197, - text_vnd_fmi_flexstor=590198, - text_vnd_wap_wml=590199, - text_vnd_wap_wmlscript=590200, - text_webviewhtml=590201, - text_x_Algol68=590202, - text_x_asm=590203, - text_x_audiosoft_intra=590204, - text_x_awk=590205, - text_x_bcpl=590206, - text_x_c=590207, - text_x_c__=590208, - text_x_component=590209, - text_x_diff=590210, - text_x_fortran=590211, - text_x_java=590212, - text_x_la_asf=590213, - text_x_lisp=590214, - text_x_m=590215, - text_x_m4=590216, - text_x_makefile=590217, - text_x_ms_regedit=590218, - text_x_msdos_batch=590219, - text_x_objective_c=590220, - text_x_pascal=590221, - text_x_perl=590222, - text_x_php=590223, - text_x_po=590224, - text_x_python=590225, - text_x_ruby=590226, - text_x_sass=590227, - text_x_scss=590228, - text_x_server_parsed_html=590229, - text_x_setext=590230, - text_x_sgml=590231 | 0x01000000, - text_x_shellscript=590232, - text_x_speech=590233, - text_x_tcl=590234, - text_x_tex=590235, - text_x_uil=590236, - text_x_uuencode=590237, - text_x_vcalendar=590238, - text_x_vcard=590239, - text_xml=590240 | 0x01000000, - video_MP2T=393633, - video_animaflex=393634, - video_avi=393635, - video_avs_video=393636, - video_mp4=393637, - video_mpeg=393638, - video_quicktime=393639, - video_vdo=393640, - video_vivo=393641, - video_vnd_rn_realvideo=393642, - video_vosaic=393643, - video_webm=393644, - video_x_amt_demorun=393645, - video_x_amt_showrun=393646, - video_x_atomic3d_feature=393647, - video_x_dl=393648, - video_x_dv=393649, - video_x_fli=393650, - video_x_flv=393651, - video_x_isvideo=393652, - video_x_jng=393653 | 0x80000000, - video_x_m4v=393654, - video_x_matroska=393655, - video_x_mng=393656, - video_x_motion_jpeg=393657, - video_x_ms_asf=393658, - video_x_msvideo=393659, - video_x_qtc=393660, - video_x_sgi_movie=393661, - x_epoc_x_sisx_app=721342, + text_PGP=590184, + text_asp=590185, + text_css=590186, + text_html=590187 | 0x01000000, + text_javascript=590188, + text_mcf=590189, + text_pascal=590190, + text_plain=590191, + text_richtext=590192, + text_rtf=590193, + text_scriplet=590194, + text_tab_separated_values=590195, + text_troff=590196, + text_uri_list=590197, + text_vnd_abc=590198, + text_vnd_fmi_flexstor=590199, + text_vnd_wap_wml=590200, + text_vnd_wap_wmlscript=590201, + text_webviewhtml=590202, + text_x_Algol68=590203, + text_x_asm=590204, + text_x_audiosoft_intra=590205, + text_x_awk=590206, + text_x_bcpl=590207, + text_x_c=590208, + text_x_c__=590209, + text_x_component=590210, + text_x_diff=590211, + text_x_fortran=590212, + text_x_java=590213, + text_x_la_asf=590214, + text_x_lisp=590215, + text_x_m=590216, + text_x_m4=590217, + text_x_makefile=590218, + text_x_ms_regedit=590219, + text_x_msdos_batch=590220, + text_x_objective_c=590221, + text_x_pascal=590222, + text_x_perl=590223, + text_x_php=590224, + text_x_po=590225, + text_x_python=590226, + text_x_ruby=590227, + text_x_sass=590228, + text_x_scss=590229, + text_x_server_parsed_html=590230, + text_x_setext=590231, + text_x_sgml=590232 | 0x01000000, + text_x_shellscript=590233, + text_x_speech=590234, + text_x_tcl=590235, + text_x_tex=590236, + text_x_uil=590237, + text_x_uuencode=590238, + text_x_vcalendar=590239, + text_x_vcard=590240, + text_xml=590241 | 0x01000000, + video_MP2T=393634, + video_animaflex=393635, + video_avi=393636, + video_avs_video=393637, + video_mp4=393638, + video_mpeg=393639, + video_quicktime=393640, + video_vdo=393641, + video_vivo=393642, + video_vnd_rn_realvideo=393643, + video_vosaic=393644, + video_webm=393645, + video_x_amt_demorun=393646, + video_x_amt_showrun=393647, + video_x_atomic3d_feature=393648, + video_x_dl=393649, + video_x_dv=393650, + video_x_fli=393651, + video_x_flv=393652, + video_x_isvideo=393653, + video_x_jng=393654 | 0x80000000, + video_x_m4v=393655, + video_x_matroska=393656, + video_x_mng=393657, + video_x_motion_jpeg=393658, + video_x_ms_asf=393659, + video_x_msvideo=393660, + video_x_qtc=393661, + video_x_sgi_movie=393662, + x_epoc_x_sisx_app=721343, }; char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { case application_arj: return "application/arj"; @@ -480,6 +481,7 @@ case application_java_archive: return "application/java-archive"; case application_java: return "application/java"; case application_javascript: return "application/javascript"; case application_json: return "application/json"; +case application_ndjson: return "application/ndjson"; case application_marc: return "application/marc"; case application_mbedlet: return "application/mbedlet"; case application_mime: return "application/mime"; @@ -930,6 +932,8 @@ g_hash_table_insert(ext_table, "inf", (gpointer)application_inf); g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive); g_hash_table_insert(ext_table, "class", (gpointer)application_java); g_hash_table_insert(ext_table, "json", (gpointer)application_json); +g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson); +g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson); g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc); g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet); g_hash_table_insert(ext_table, "aps", (gpointer)application_mime); @@ -1474,6 +1478,7 @@ g_hash_table_insert(mime_table, "application/java-archive", (gpointer)applicatio g_hash_table_insert(mime_table, "application/java", (gpointer)application_java); g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript); g_hash_table_insert(mime_table, "application/json", (gpointer)application_json); +g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson); g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc); g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet); g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime); diff --git a/src/parsing/parse.c b/src/parsing/parse.c index b242612..647ff6b 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -182,8 +182,10 @@ void parse(void *arg) { return; } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) { parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc); - } else if (is_wpd(&ScanCtx.wpd_ctx, doc->mime)) { - parse_wpd(&ScanCtx.wpd_ctx, &job->vfile, doc); + } else if (is_json(&ScanCtx.json_ctx, doc->mime)) { + parse_json(&ScanCtx.json_ctx, &job->vfile, doc); + } else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) { + parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc); } abort: diff --git a/src/parsing/parse.h b/src/parsing/parse.h index f16f6d1..a62dcc4 100644 --- a/src/parsing/parse.h +++ b/src/parsing/parse.h @@ -3,7 +3,7 @@ #include "../sist.h" -#define MAGIC_BUF_SIZE 4096 * 6 +#define MAGIC_BUF_SIZE (4096 * 6) int fs_read(struct vfile *f, void *buf, size_t size); void fs_close(struct vfile *f);