Support for markup files

This commit is contained in:
simon987 2020-04-30 20:21:09 -04:00
parent 718169345e
commit bd9e56829c
10 changed files with 224 additions and 200 deletions

View File

@ -1,6 +1,6 @@
rm ./sist2 sist2_debug
cp ../sist2.gz ../sist2_debug.gz .
gzip -d sist2.gz sist2_debug.gz
cp ../sist2.gz .
gzip -d sist2.gz
strip sist2
version=$(./sist2 --version)

View File

@ -428,3 +428,4 @@ video/x-msvideo, divx
video/x-qtc, qtc
video/x-sgi-movie, movie|mv
x-epoc/x-sisx-app,
application/x-zstd-dictionary,

1 application/arj arj
428 video/x-qtc qtc
429 video/x-sgi-movie movie|mv
430 x-epoc/x-sisx-app
431 application/x-zstd-dictionary

View File

@ -67,6 +67,12 @@ mobi = (
"application/vnd.amazon.mobi8-ebook"
)
markup = (
"text/xml",
"text/html",
"text/x-sgml"
)
cnt = 1
@ -89,6 +95,8 @@ def mime_id(mime):
mime_id += " | 0x04000000"
elif mime in mobi:
mime_id += " | 0x02000000"
elif mime in markup:
mime_id += " | 0x01000000"
elif mime == "application/x-empty":
return "1"
return mime_id

View File

@ -19,7 +19,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "2.0.0";
static const char *const Version = "2.1.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@ -136,6 +136,11 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.mobi_ctx.log = _log;
ScanCtx.mobi_ctx.logf = _logf;
// TEXT
ScanCtx.text_ctx.content_size = args->content_size;
ScanCtx.text_ctx.log = _log;
ScanCtx.text_ctx.logf = _logf;
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;

View File

@ -3,7 +3,7 @@
#include "../sist.h"
#define MAJOR_MIME(mime_id) (mime_id & 0x0FFF0000) >> 16
#define MAJOR_MIME(mime_id) (mime_id & 0x00FF0000) >> 16
#define MIME_EMPTY 1
@ -28,6 +28,9 @@
#define MOBI_MASK 0x02000000
#define IS_MOBI(mime_id) (mime_id & MOBI_MASK) == MOBI_MASK
#define MARKUP_MASK 0x01000000
#define IS_MARKUP(mime_id) (mime_id & MARKUP_MASK) == MARKUP_MASK
enum major_mime {
MimeInvalid = 0,
MimeModel = 1,

View File

@ -242,198 +242,199 @@ enum mime {
application_x_xz=655594 | 0x08000000,
application_x_zip=655595,
application_x_zstd=655596 | 0x08000000,
application_xml=655597,
application_zip=655598 | 0x10000000,
application_zlib=655599,
audio_basic=458992 | 0x80000000,
audio_it=458993,
audio_make=458994,
audio_mid=458995,
audio_midi=458996,
audio_mp4=458997,
audio_mpeg=458998,
audio_ogg=458999,
audio_s3m=459000,
audio_tsp_audio=459001,
audio_tsplayer=459002,
audio_vnd_qcelp=459003,
audio_voxware=459004,
audio_x_aiff=459005,
audio_x_flac=459006,
audio_x_gsm=459007,
audio_x_hx_aac_adts=459008,
audio_x_jam=459009,
audio_x_liveaudio=459010,
audio_x_m4a=459011,
audio_x_midi=459012,
audio_x_mod=459013,
audio_x_mp4a_latm=459014,
audio_x_mpeg_3=459015,
audio_x_mpequrl=459016,
audio_x_nspaudio=459017,
audio_x_pn_realaudio=459018,
audio_x_psid=459019,
audio_x_realaudio=459020,
audio_x_s3m=459021,
audio_x_twinvq=459022,
audio_x_twinvq_plugin=459023,
audio_x_voc=459024,
audio_x_wav=459025,
audio_x_xbox_executable=459026 | 0x80000000,
audio_x_xbox360_executable=459027 | 0x80000000,
audio_xm=459028,
font_otf=327957 | 0x20000000,
font_sfnt=327958 | 0x20000000,
font_woff=327959 | 0x20000000,
font_woff2=327960 | 0x20000000,
image_bmp=524569,
image_cmu_raster=524570,
image_fif=524571,
image_florian=524572,
image_g3fax=524573,
image_gif=524574,
image_heic=524575,
image_ief=524576,
image_jpeg=524577,
image_jutvision=524578,
image_naplps=524579,
image_pict=524580,
image_png=524581,
image_svg=524582 | 0x80000000,
image_svg_xml=524583 | 0x80000000,
image_tiff=524584,
image_vnd_adobe_photoshop=524585 | 0x80000000,
image_vnd_djvu=524586 | 0x80000000,
image_vnd_fpx=524587,
image_vnd_microsoft_icon=524588,
image_vnd_rn_realflash=524589,
image_vnd_rn_realpix=524590,
image_vnd_wap_wbmp=524591,
image_vnd_xiff=524592,
image_webp=524593,
image_wmf=524594,
image_x_3ds=524595,
image_x_award_bioslogo=524596,
image_x_cmu_raster=524597,
image_x_cur=524598,
image_x_dwg=524599,
image_x_eps=524600,
image_x_exr=524601,
image_x_gem=524602,
image_x_icns=524603,
image_x_icon=524604 | 0x80000000,
image_x_jg=524605,
image_x_jps=524606,
image_x_ms_bmp=524607,
image_x_niff=524608,
image_x_pcx=524609,
image_x_pict=524610,
image_x_portable_bitmap=524611,
image_x_portable_graymap=524612,
image_x_portable_pixmap=524613,
image_x_quicktime=524614,
image_x_rgb=524615,
image_x_tga=524616,
image_x_tiff=524617,
image_x_win_bitmap=524618,
image_x_xcf=524619 | 0x80000000,
image_x_xpixmap=524620 | 0x80000000,
image_x_xwindowdump=524621,
message_news=196942,
message_rfc822=196943,
model_vnd_dwf=65872,
model_vnd_gdl=65873,
model_vnd_gs_gdl=65874,
model_vrml=65875,
model_x_pov=65876,
text_PGP=590165,
text_asp=590166,
text_css=590167,
text_html=590168,
text_javascript=590169,
text_mcf=590170,
text_pascal=590171,
text_plain=590172,
text_richtext=590173,
text_rtf=590174,
text_scriplet=590175,
text_tab_separated_values=590176,
text_troff=590177,
text_uri_list=590178,
text_vnd_abc=590179,
text_vnd_fmi_flexstor=590180,
text_vnd_wap_wml=590181,
text_vnd_wap_wmlscript=590182,
text_webviewhtml=590183,
text_x_Algol68=590184,
text_x_asm=590185,
text_x_audiosoft_intra=590186,
text_x_awk=590187,
text_x_bcpl=590188,
text_x_c=590189,
text_x_c__=590190,
text_x_component=590191,
text_x_diff=590192,
text_x_fortran=590193,
text_x_java=590194,
text_x_la_asf=590195,
text_x_lisp=590196,
text_x_m=590197,
text_x_m4=590198,
text_x_makefile=590199,
text_x_ms_regedit=590200,
text_x_msdos_batch=590201,
text_x_objective_c=590202,
text_x_pascal=590203,
text_x_perl=590204,
text_x_php=590205,
text_x_po=590206,
text_x_python=590207,
text_x_ruby=590208,
text_x_sass=590209,
text_x_scss=590210,
text_x_server_parsed_html=590211,
text_x_setext=590212,
text_x_sgml=590213,
text_x_shellscript=590214,
text_x_speech=590215,
text_x_tcl=590216,
text_x_tex=590217,
text_x_uil=590218,
text_x_uuencode=590219,
text_x_vcalendar=590220,
text_x_vcard=590221,
text_xml=590222,
video_MP2T=393615,
video_animaflex=393616,
video_avi=393617,
video_avs_video=393618,
video_mp4=393619,
video_mpeg=393620,
video_quicktime=393621,
video_vdo=393622,
video_vivo=393623,
video_vnd_rn_realvideo=393624,
video_vosaic=393625,
video_webm=393626,
video_x_amt_demorun=393627,
video_x_amt_showrun=393628,
video_x_atomic3d_feature=393629,
video_x_dl=393630,
video_x_dv=393631,
video_x_fli=393632,
video_x_flv=393633,
video_x_isvideo=393634,
video_x_jng=393635 | 0x80000000,
video_x_m4v=393636,
video_x_matroska=393637,
video_x_mng=393638,
video_x_motion_jpeg=393639,
video_x_ms_asf=393640,
video_x_msvideo=393641,
video_x_qtc=393642,
video_x_sgi_movie=393643,
x_epoc_x_sisx_app=721324,
application_x_zstd_dictionary=655597,
application_xml=655598,
application_zip=655599 | 0x10000000,
application_zlib=655600,
audio_basic=458993 | 0x80000000,
audio_it=458994,
audio_make=458995,
audio_mid=458996,
audio_midi=458997,
audio_mp4=458998,
audio_mpeg=458999,
audio_ogg=459000,
audio_s3m=459001,
audio_tsp_audio=459002,
audio_tsplayer=459003,
audio_vnd_qcelp=459004,
audio_voxware=459005,
audio_x_aiff=459006,
audio_x_flac=459007,
audio_x_gsm=459008,
audio_x_hx_aac_adts=459009,
audio_x_jam=459010,
audio_x_liveaudio=459011,
audio_x_m4a=459012,
audio_x_midi=459013,
audio_x_mod=459014,
audio_x_mp4a_latm=459015,
audio_x_mpeg_3=459016,
audio_x_mpequrl=459017,
audio_x_nspaudio=459018,
audio_x_pn_realaudio=459019,
audio_x_psid=459020,
audio_x_realaudio=459021,
audio_x_s3m=459022,
audio_x_twinvq=459023,
audio_x_twinvq_plugin=459024,
audio_x_voc=459025,
audio_x_wav=459026,
audio_x_xbox_executable=459027 | 0x80000000,
audio_x_xbox360_executable=459028 | 0x80000000,
audio_xm=459029,
font_otf=327958 | 0x20000000,
font_sfnt=327959 | 0x20000000,
font_woff=327960 | 0x20000000,
font_woff2=327961 | 0x20000000,
image_bmp=524570,
image_cmu_raster=524571,
image_fif=524572,
image_florian=524573,
image_g3fax=524574,
image_gif=524575,
image_heic=524576,
image_ief=524577,
image_jpeg=524578,
image_jutvision=524579,
image_naplps=524580,
image_pict=524581,
image_png=524582,
image_svg=524583 | 0x80000000,
image_svg_xml=524584 | 0x80000000,
image_tiff=524585,
image_vnd_adobe_photoshop=524586 | 0x80000000,
image_vnd_djvu=524587 | 0x80000000,
image_vnd_fpx=524588,
image_vnd_microsoft_icon=524589,
image_vnd_rn_realflash=524590,
image_vnd_rn_realpix=524591,
image_vnd_wap_wbmp=524592,
image_vnd_xiff=524593,
image_webp=524594,
image_wmf=524595,
image_x_3ds=524596,
image_x_award_bioslogo=524597,
image_x_cmu_raster=524598,
image_x_cur=524599,
image_x_dwg=524600,
image_x_eps=524601,
image_x_exr=524602,
image_x_gem=524603,
image_x_icns=524604,
image_x_icon=524605 | 0x80000000,
image_x_jg=524606,
image_x_jps=524607,
image_x_ms_bmp=524608,
image_x_niff=524609,
image_x_pcx=524610,
image_x_pict=524611,
image_x_portable_bitmap=524612,
image_x_portable_graymap=524613,
image_x_portable_pixmap=524614,
image_x_quicktime=524615,
image_x_rgb=524616,
image_x_tga=524617,
image_x_tiff=524618,
image_x_win_bitmap=524619,
image_x_xcf=524620 | 0x80000000,
image_x_xpixmap=524621 | 0x80000000,
image_x_xwindowdump=524622,
message_news=196943,
message_rfc822=196944,
model_vnd_dwf=65873,
model_vnd_gdl=65874,
model_vnd_gs_gdl=65875,
model_vrml=65876,
model_x_pov=65877,
text_PGP=590166,
text_asp=590167,
text_css=590168,
text_html=590169 | 0x01000000,
text_javascript=590170,
text_mcf=590171,
text_pascal=590172,
text_plain=590173,
text_richtext=590174,
text_rtf=590175,
text_scriplet=590176,
text_tab_separated_values=590177,
text_troff=590178,
text_uri_list=590179,
text_vnd_abc=590180,
text_vnd_fmi_flexstor=590181,
text_vnd_wap_wml=590182,
text_vnd_wap_wmlscript=590183,
text_webviewhtml=590184,
text_x_Algol68=590185,
text_x_asm=590186,
text_x_audiosoft_intra=590187,
text_x_awk=590188,
text_x_bcpl=590189,
text_x_c=590190,
text_x_c__=590191,
text_x_component=590192,
text_x_diff=590193,
text_x_fortran=590194,
text_x_java=590195,
text_x_la_asf=590196,
text_x_lisp=590197,
text_x_m=590198,
text_x_m4=590199,
text_x_makefile=590200,
text_x_ms_regedit=590201,
text_x_msdos_batch=590202,
text_x_objective_c=590203,
text_x_pascal=590204,
text_x_perl=590205,
text_x_php=590206,
text_x_po=590207,
text_x_python=590208,
text_x_ruby=590209,
text_x_sass=590210,
text_x_scss=590211,
text_x_server_parsed_html=590212,
text_x_setext=590213,
text_x_sgml=590214 | 0x01000000,
text_x_shellscript=590215,
text_x_speech=590216,
text_x_tcl=590217,
text_x_tex=590218,
text_x_uil=590219,
text_x_uuencode=590220,
text_x_vcalendar=590221,
text_x_vcard=590222,
text_xml=590223 | 0x01000000,
video_MP2T=393616,
video_animaflex=393617,
video_avi=393618,
video_avs_video=393619,
video_mp4=393620,
video_mpeg=393621,
video_quicktime=393622,
video_vdo=393623,
video_vivo=393624,
video_vnd_rn_realvideo=393625,
video_vosaic=393626,
video_webm=393627,
video_x_amt_demorun=393628,
video_x_amt_showrun=393629,
video_x_atomic3d_feature=393630,
video_x_dl=393631,
video_x_dv=393632,
video_x_fli=393633,
video_x_flv=393634,
video_x_isvideo=393635,
video_x_jng=393636 | 0x80000000,
video_x_m4v=393637,
video_x_matroska=393638,
video_x_mng=393639,
video_x_motion_jpeg=393640,
video_x_ms_asf=393641,
video_x_msvideo=393642,
video_x_qtc=393643,
video_x_sgi_movie=393644,
x_epoc_x_sisx_app=721325,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@ -864,6 +865,7 @@ case video_x_msvideo: return "video/x-msvideo";
case video_x_qtc: return "video/x-qtc";
case video_x_sgi_movie: return "video/x-sgi-movie";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_zstd_dictionary: return "application/x-zstd-dictionary";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@ -1813,5 +1815,6 @@ g_hash_table_insert(mime_table, "video/x-msvideo", (gpointer)video_x_msvideo);
g_hash_table_insert(mime_table, "video/x-qtc", (gpointer)video_x_qtc);
g_hash_table_insert(mime_table, "video/x-sgi-movie", (gpointer)video_x_sgi_movie);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-zstd-dictionary", (gpointer)application_x_zstd_dictionary);
return mime_table;}
#endif

View File

@ -124,7 +124,11 @@ void parse(void *arg) {
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc.mime), &doc);
} else if (mmime == MimeText && ScanCtx.text_ctx.content_size > 0) {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
if (IS_MARKUP(doc.mime)) {
parse_markup(&ScanCtx.text_ctx, &job->vfile, &doc);
} else {
parse_text(&ScanCtx.text_ctx, &job->vfile, &doc);
}
} else if (IS_FONT(doc.mime)) {
parse_font(&ScanCtx.font_ctx, &job->vfile, &doc);

View File

@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">2.0.0</span>
<span class="badge badge-pill version">2.0.1</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span>
<button style="margin-left: auto" class="btn" type="button" data-toggle="modal" data-target="#settings" onclick="loadSettings()">Settings</button>
<a id="theme" class="btn" title="Toggle theme" href="/">Theme</a>

File diff suppressed because one or more lines are too long

2
third-party/libscan vendored

@ -1 +1 @@
Subproject commit 5739391cb71fa6bcfe4a77691b2fe02eec773326
Subproject commit 22e75650d4ade9f5ff810d28ef96e7d7e427fe65