mirror of
https://github.com/simon987/sist2.git
synced 2025-04-10 14:06:45 +00:00
Support for markup files
This commit is contained in:
parent
718169345e
commit
bd9e56829c
@ -1,6 +1,6 @@
|
||||
rm ./sist2 sist2_debug
|
||||
cp ../sist2.gz ../sist2_debug.gz .
|
||||
gzip -d sist2.gz sist2_debug.gz
|
||||
cp ../sist2.gz .
|
||||
gzip -d sist2.gz
|
||||
strip sist2
|
||||
|
||||
version=$(./sist2 --version)
|
||||
|
@ -428,3 +428,4 @@ video/x-msvideo, divx
|
||||
video/x-qtc, qtc
|
||||
video/x-sgi-movie, movie|mv
|
||||
x-epoc/x-sisx-app,
|
||||
application/x-zstd-dictionary,
|
||||
|
|
@ -67,6 +67,12 @@ mobi = (
|
||||
"application/vnd.amazon.mobi8-ebook"
|
||||
)
|
||||
|
||||
markup = (
|
||||
"text/xml",
|
||||
"text/html",
|
||||
"text/x-sgml"
|
||||
)
|
||||
|
||||
cnt = 1
|
||||
|
||||
|
||||
@ -89,6 +95,8 @@ def mime_id(mime):
|
||||
mime_id += " | 0x04000000"
|
||||
elif mime in mobi:
|
||||
mime_id += " | 0x02000000"
|
||||
elif mime in markup:
|
||||
mime_id += " | 0x01000000"
|
||||
elif mime == "application/x-empty":
|
||||
return "1"
|
||||
return mime_id
|
||||
|
@ -19,7 +19,7 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "2.0.0";
|
||||
static const char *const Version = "2.1.0";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
@ -136,6 +136,11 @@ void initialize_scan_context(scan_args_t *args) {
|
||||
ScanCtx.mobi_ctx.log = _log;
|
||||
ScanCtx.mobi_ctx.logf = _logf;
|
||||
|
||||
// TEXT
|
||||
ScanCtx.text_ctx.content_size = args->content_size;
|
||||
ScanCtx.text_ctx.log = _log;
|
||||
ScanCtx.text_ctx.logf = _logf;
|
||||
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "../sist.h"
|
||||
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
|
||||
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
|
||||
|
||||
#define MIME_EMPTY 1
|
||||
|
||||
@ -28,6 +28,9 @@
|
||||
#define MOBI_MASK 0x02000000
|
||||
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
|
||||
|
||||
#define MARKUP_MASK 0x01000000
|
||||
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
|
||||
|
||||
enum major_mime {
|
||||
MimeInvalid = 0,
|
||||
MimeModel = 1,
|
||||
|
@ -242,198 +242,199 @@ enum mime {
|
||||
application_x_xz=655594 | 0x08000000,
|
||||
application_x_zip=655595,
|
||||
application_x_zstd=655596 | 0x08000000,
|
||||
application_xml=655597,
|
||||
application_zip=655598 | 0x10000000,
|
||||
application_zlib=655599,
|
||||
audio_basic=458992 | 0x80000000,
|
||||
audio_it=458993,
|
||||
audio_make=458994,
|
||||
audio_mid=458995,
|
||||
audio_midi=458996,
|
||||
audio_mp4=458997,
|
||||
audio_mpeg=458998,
|
||||
audio_ogg=458999,
|
||||
audio_s3m=459000,
|
||||
audio_tsp_audio=459001,
|
||||
audio_tsplayer=459002,
|
||||
audio_vnd_qcelp=459003,
|
||||
audio_voxware=459004,
|
||||
audio_x_aiff=459005,
|
||||
audio_x_flac=459006,
|
||||
audio_x_gsm=459007,
|
||||
audio_x_hx_aac_adts=459008,
|
||||
audio_x_jam=459009,
|
||||
audio_x_liveaudio=459010,
|
||||
audio_x_m4a=459011,
|
||||
audio_x_midi=459012,
|
||||
audio_x_mod=459013,
|
||||
audio_x_mp4a_latm=459014,
|
||||
audio_x_mpeg_3=459015,
|
||||
audio_x_mpequrl=459016,
|
||||
audio_x_nspaudio=459017,
|
||||
audio_x_pn_realaudio=459018,
|
||||
audio_x_psid=459019,
|
||||
audio_x_realaudio=459020,
|
||||
audio_x_s3m=459021,
|
||||
audio_x_twinvq=459022,
|
||||
audio_x_twinvq_plugin=459023,
|
||||
audio_x_voc=459024,
|
||||
audio_x_wav=459025,
|
||||
audio_x_xbox_executable=459026 | 0x80000000,
|
||||
audio_x_xbox360_executable=459027 | 0x80000000,
|
||||
audio_xm=459028,
|
||||
font_otf=327957 | 0x20000000,
|
||||
font_sfnt=327958 | 0x20000000,
|
||||
font_woff=327959 | 0x20000000,
|
||||
font_woff2=327960 | 0x20000000,
|
||||
image_bmp=524569,
|
||||
image_cmu_raster=524570,
|
||||
image_fif=524571,
|
||||
image_florian=524572,
|
||||
image_g3fax=524573,
|
||||
image_gif=524574,
|
||||
image_heic=524575,
|
||||
image_ief=524576,
|
||||
image_jpeg=524577,
|
||||
image_jutvision=524578,
|
||||
image_naplps=524579,
|
||||
image_pict=524580,
|
||||
image_png=524581,
|
||||
image_svg=524582 | 0x80000000,
|
||||
image_svg_xml=524583 | 0x80000000,
|
||||
image_tiff=524584,
|
||||
image_vnd_adobe_photoshop=524585 | 0x80000000,
|
||||
image_vnd_djvu=524586 | 0x80000000,
|
||||
image_vnd_fpx=524587,
|
||||
image_vnd_microsoft_icon=524588,
|
||||
image_vnd_rn_realflash=524589,
|
||||
image_vnd_rn_realpix=524590,
|
||||
image_vnd_wap_wbmp=524591,
|
||||
image_vnd_xiff=524592,
|
||||
image_webp=524593,
|
||||
image_wmf=524594,
|
||||
image_x_3ds=524595,
|
||||
image_x_award_bioslogo=524596,
|
||||
image_x_cmu_raster=524597,
|
||||
image_x_cur=524598,
|
||||
image_x_dwg=524599,
|
||||
image_x_eps=524600,
|
||||
image_x_exr=524601,
|
||||
image_x_gem=524602,
|
||||
image_x_icns=524603,
|
||||
image_x_icon=524604 | 0x80000000,
|
||||
image_x_jg=524605,
|
||||
image_x_jps=524606,
|
||||
image_x_ms_bmp=524607,
|
||||
image_x_niff=524608,
|
||||
image_x_pcx=524609,
|
||||
image_x_pict=524610,
|
||||
image_x_portable_bitmap=524611,
|
||||
image_x_portable_graymap=524612,
|
||||
image_x_portable_pixmap=524613,
|
||||
image_x_quicktime=524614,
|
||||
image_x_rgb=524615,
|
||||
image_x_tga=524616,
|
||||
image_x_tiff=524617,
|
||||
image_x_win_bitmap=524618,
|
||||
image_x_xcf=524619 | 0x80000000,
|
||||
image_x_xpixmap=524620 | 0x80000000,
|
||||
image_x_xwindowdump=524621,
|
||||
message_news=196942,
|
||||
message_rfc822=196943,
|
||||
model_vnd_dwf=65872,
|
||||
model_vnd_gdl=65873,
|
||||
model_vnd_gs_gdl=65874,
|
||||
model_vrml=65875,
|
||||
model_x_pov=65876,
|
||||
text_PGP=590165,
|
||||
text_asp=590166,
|
||||
text_css=590167,
|
||||
text_html=590168,
|
||||
text_javascript=590169,
|
||||
text_mcf=590170,
|
||||
text_pascal=590171,
|
||||
text_plain=590172,
|
||||
text_richtext=590173,
|
||||
text_rtf=590174,
|
||||
text_scriplet=590175,
|
||||
text_tab_separated_values=590176,
|
||||
text_troff=590177,
|
||||
text_uri_list=590178,
|
||||
text_vnd_abc=590179,
|
||||
text_vnd_fmi_flexstor=590180,
|
||||
text_vnd_wap_wml=590181,
|
||||
text_vnd_wap_wmlscript=590182,
|
||||
text_webviewhtml=590183,
|
||||
text_x_Algol68=590184,
|
||||
text_x_asm=590185,
|
||||
text_x_audiosoft_intra=590186,
|
||||
text_x_awk=590187,
|
||||
text_x_bcpl=590188,
|
||||
text_x_c=590189,
|
||||
text_x_c__=590190,
|
||||
text_x_component=590191,
|
||||
text_x_diff=590192,
|
||||
text_x_fortran=590193,
|
||||
text_x_java=590194,
|
||||
text_x_la_asf=590195,
|
||||
text_x_lisp=590196,
|
||||
text_x_m=590197,
|
||||
text_x_m4=590198,
|
||||
text_x_makefile=590199,
|
||||
text_x_ms_regedit=590200,
|
||||
text_x_msdos_batch=590201,
|
||||
text_x_objective_c=590202,
|
||||
text_x_pascal=590203,
|
||||
text_x_perl=590204,
|
||||
text_x_php=590205,
|
||||
text_x_po=590206,
|
||||
text_x_python=590207,
|
||||
text_x_ruby=590208,
|
||||
text_x_sass=590209,
|
||||
text_x_scss=590210,
|
||||
text_x_server_parsed_html=590211,
|
||||
text_x_setext=590212,
|
||||
text_x_sgml=590213,
|
||||
text_x_shellscript=590214,
|
||||
text_x_speech=590215,
|
||||
text_x_tcl=590216,
|
||||
text_x_tex=590217,
|
||||
text_x_uil=590218,
|
||||
text_x_uuencode=590219,
|
||||
text_x_vcalendar=590220,
|
||||
text_x_vcard=590221,
|
||||
text_xml=590222,
|
||||
video_MP2T=393615,
|
||||
video_animaflex=393616,
|
||||
video_avi=393617,
|
||||
video_avs_video=393618,
|
||||
video_mp4=393619,
|
||||
video_mpeg=393620,
|
||||
video_quicktime=393621,
|
||||
video_vdo=393622,
|
||||
video_vivo=393623,
|
||||
video_vnd_rn_realvideo=393624,
|
||||
video_vosaic=393625,
|
||||
video_webm=393626,
|
||||
video_x_amt_demorun=393627,
|
||||
video_x_amt_showrun=393628,
|
||||
video_x_atomic3d_feature=393629,
|
||||
video_x_dl=393630,
|
||||
video_x_dv=393631,
|
||||
video_x_fli=393632,
|
||||
video_x_flv=393633,
|
||||
video_x_isvideo=393634,
|
||||
video_x_jng=393635 | 0x80000000,
|
||||
video_x_m4v=393636,
|
||||
video_x_matroska=393637,
|
||||
video_x_mng=393638,
|
||||
video_x_motion_jpeg=393639,
|
||||
video_x_ms_asf=393640,
|
||||
video_x_msvideo=393641,
|
||||
video_x_qtc=393642,
|
||||
video_x_sgi_movie=393643,
|
||||
x_epoc_x_sisx_app=721324,
|
||||
application_x_zstd_dictionary=655597,
|
||||
application_xml=655598,
|
||||
application_zip=655599 | 0x10000000,
|
||||
application_zlib=655600,
|
||||
audio_basic=458993 | 0x80000000,
|
||||
audio_it=458994,
|
||||
audio_make=458995,
|
||||
audio_mid=458996,
|
||||
audio_midi=458997,
|
||||
audio_mp4=458998,
|
||||
audio_mpeg=458999,
|
||||
audio_ogg=459000,
|
||||
audio_s3m=459001,
|
||||
audio_tsp_audio=459002,
|
||||
audio_tsplayer=459003,
|
||||
audio_vnd_qcelp=459004,
|
||||
audio_voxware=459005,
|
||||
audio_x_aiff=459006,
|
||||
audio_x_flac=459007,
|
||||
audio_x_gsm=459008,
|
||||
audio_x_hx_aac_adts=459009,
|
||||
audio_x_jam=459010,
|
||||
audio_x_liveaudio=459011,
|
||||
audio_x_m4a=459012,
|
||||
audio_x_midi=459013,
|
||||
audio_x_mod=459014,
|
||||
audio_x_mp4a_latm=459015,
|
||||
audio_x_mpeg_3=459016,
|
||||
audio_x_mpequrl=459017,
|
||||
audio_x_nspaudio=459018,
|
||||
audio_x_pn_realaudio=459019,
|
||||
audio_x_psid=459020,
|
||||
audio_x_realaudio=459021,
|
||||
audio_x_s3m=459022,
|
||||
audio_x_twinvq=459023,
|
||||
audio_x_twinvq_plugin=459024,
|
||||
audio_x_voc=459025,
|
||||
audio_x_wav=459026,
|
||||
audio_x_xbox_executable=459027 | 0x80000000,
|
||||
audio_x_xbox360_executable=459028 | 0x80000000,
|
||||
audio_xm=459029,
|
||||
font_otf=327958 | 0x20000000,
|
||||
font_sfnt=327959 | 0x20000000,
|
||||
font_woff=327960 | 0x20000000,
|
||||
font_woff2=327961 | 0x20000000,
|
||||
image_bmp=524570,
|
||||
image_cmu_raster=524571,
|
||||
image_fif=524572,
|
||||
image_florian=524573,
|
||||
image_g3fax=524574,
|
||||
image_gif=524575,
|
||||
image_heic=524576,
|
||||
image_ief=524577,
|
||||
image_jpeg=524578,
|
||||
image_jutvision=524579,
|
||||
image_naplps=524580,
|
||||
image_pict=524581,
|
||||
image_png=524582,
|
||||
image_svg=524583 | 0x80000000,
|
||||
image_svg_xml=524584 | 0x80000000,
|
||||
image_tiff=524585,
|
||||
image_vnd_adobe_photoshop=524586 | 0x80000000,
|
||||
image_vnd_djvu=524587 | 0x80000000,
|
||||
image_vnd_fpx=524588,
|
||||
image_vnd_microsoft_icon=524589,
|
||||
image_vnd_rn_realflash=524590,
|
||||
image_vnd_rn_realpix=524591,
|
||||
image_vnd_wap_wbmp=524592,
|
||||
image_vnd_xiff=524593,
|
||||
image_webp=524594,
|
||||
image_wmf=524595,
|
||||
image_x_3ds=524596,
|
||||
image_x_award_bioslogo=524597,
|
||||
image_x_cmu_raster=524598,
|
||||
image_x_cur=524599,
|
||||
image_x_dwg=524600,
|
||||
image_x_eps=524601,
|
||||
image_x_exr=524602,
|
||||
image_x_gem=524603,
|
||||
image_x_icns=524604,
|
||||
image_x_icon=524605 | 0x80000000,
|
||||
image_x_jg=524606,
|
||||
image_x_jps=524607,
|
||||
image_x_ms_bmp=524608,
|
||||
image_x_niff=524609,
|
||||
image_x_pcx=524610,
|
||||
image_x_pict=524611,
|
||||
image_x_portable_bitmap=524612,
|
||||
image_x_portable_graymap=524613,
|
||||
image_x_portable_pixmap=524614,
|
||||
image_x_quicktime=524615,
|
||||
image_x_rgb=524616,
|
||||
image_x_tga=524617,
|
||||
image_x_tiff=524618,
|
||||
image_x_win_bitmap=524619,
|
||||
image_x_xcf=524620 | 0x80000000,
|
||||
image_x_xpixmap=524621 | 0x80000000,
|
||||
image_x_xwindowdump=524622,
|
||||
message_news=196943,
|
||||
message_rfc822=196944,
|
||||
model_vnd_dwf=65873,
|
||||
model_vnd_gdl=65874,
|
||||
model_vnd_gs_gdl=65875,
|
||||
model_vrml=65876,
|
||||
model_x_pov=65877,
|
||||
text_PGP=590166,
|
||||
text_asp=590167,
|
||||
text_css=590168,
|
||||
text_html=590169 | 0x01000000,
|
||||
text_javascript=590170,
|
||||
text_mcf=590171,
|
||||
text_pascal=590172,
|
||||
text_plain=590173,
|
||||
text_richtext=590174,
|
||||
text_rtf=590175,
|
||||
text_scriplet=590176,
|
||||
text_tab_separated_values=590177,
|
||||
text_troff=590178,
|
||||
text_uri_list=590179,
|
||||
text_vnd_abc=590180,
|
||||
text_vnd_fmi_flexstor=590181,
|
||||
text_vnd_wap_wml=590182,
|
||||
text_vnd_wap_wmlscript=590183,
|
||||
text_webviewhtml=590184,
|
||||
text_x_Algol68=590185,
|
||||
text_x_asm=590186,
|
||||
text_x_audiosoft_intra=590187,
|
||||
text_x_awk=590188,
|
||||
text_x_bcpl=590189,
|
||||
text_x_c=590190,
|
||||
text_x_c__=590191,
|
||||
text_x_component=590192,
|
||||
text_x_diff=590193,
|
||||
text_x_fortran=590194,
|
||||
text_x_java=590195,
|
||||
text_x_la_asf=590196,
|
||||
text_x_lisp=590197,
|
||||
text_x_m=590198,
|
||||
text_x_m4=590199,
|
||||
text_x_makefile=590200,
|
||||
text_x_ms_regedit=590201,
|
||||
text_x_msdos_batch=590202,
|
||||
text_x_objective_c=590203,
|
||||
text_x_pascal=590204,
|
||||
text_x_perl=590205,
|
||||
text_x_php=590206,
|
||||
text_x_po=590207,
|
||||
text_x_python=590208,
|
||||
text_x_ruby=590209,
|
||||
text_x_sass=590210,
|
||||
text_x_scss=590211,
|
||||
text_x_server_parsed_html=590212,
|
||||
text_x_setext=590213,
|
||||
text_x_sgml=590214 | 0x01000000,
|
||||
text_x_shellscript=590215,
|
||||
text_x_speech=590216,
|
||||
text_x_tcl=590217,
|
||||
text_x_tex=590218,
|
||||
text_x_uil=590219,
|
||||
text_x_uuencode=590220,
|
||||
text_x_vcalendar=590221,
|
||||
text_x_vcard=590222,
|
||||
text_xml=590223 | 0x01000000,
|
||||
video_MP2T=393616,
|
||||
video_animaflex=393617,
|
||||
video_avi=393618,
|
||||
video_avs_video=393619,
|
||||
video_mp4=393620,
|
||||
video_mpeg=393621,
|
||||
video_quicktime=393622,
|
||||
video_vdo=393623,
|
||||
video_vivo=393624,
|
||||
video_vnd_rn_realvideo=393625,
|
||||
video_vosaic=393626,
|
||||
video_webm=393627,
|
||||
video_x_amt_demorun=393628,
|
||||
video_x_amt_showrun=393629,
|
||||
video_x_atomic3d_feature=393630,
|
||||
video_x_dl=393631,
|
||||
video_x_dv=393632,
|
||||
video_x_fli=393633,
|
||||
video_x_flv=393634,
|
||||
video_x_isvideo=393635,
|
||||
video_x_jng=393636 | 0x80000000,
|
||||
video_x_m4v=393637,
|
||||
video_x_matroska=393638,
|
||||
video_x_mng=393639,
|
||||
video_x_motion_jpeg=393640,
|
||||
video_x_ms_asf=393641,
|
||||
video_x_msvideo=393642,
|
||||
video_x_qtc=393643,
|
||||
video_x_sgi_movie=393644,
|
||||
x_epoc_x_sisx_app=721325,
|
||||
};
|
||||
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
|
||||
case application_arj: return "application/arj";
|
||||
@ -864,6 +865,7 @@ case video_x_msvideo: return "video/x-msvideo";
|
||||
case video_x_qtc: return "video/x-qtc";
|
||||
case video_x_sgi_movie: return "video/x-sgi-movie";
|
||||
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
|
||||
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
|
||||
default: return NULL;}}
|
||||
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
|
||||
@ -1813,5 +1815,6 @@ g_hash_table_insert(mime_table, "video/x-msvideo", (gpointer)video_x_msvideo);
|
||||
g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc);
|
||||
g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie);
|
||||
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
|
||||
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
|
||||
return mime_table;}
|
||||
#endif
|
||||
|
@ -124,7 +124,11 @@ void parse(void *arg) {
|
||||
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
|
||||
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
if (IS_MARKUP(doc.mime)) {
|
||||
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
} else {
|
||||
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
|
||||
}
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">2.0.0</span>
|
||||
<span class="badge badge-pill version">2.0.1</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
|
||||
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
|
||||
|
File diff suppressed because one or more lines are too long
2
third-party/libscan
vendored
2
third-party/libscan
vendored
@ -1 +1 @@
|
||||
Subproject commit 5739391cb71fa6bcfe4a77691b2fe02eec773326
|
||||
Subproject commit 22e75650d4ade9f5ff810d28ef96e7d7e427fe65
|
Loading…
x
Reference in New Issue
Block a user