utf8 update + bug fixes

This commit is contained in:
simon 2019-11-03 07:44:54 -05:00
parent f8f1a27180
commit 7962a994e2
28 changed files with 1022 additions and 503 deletions

9
.gitmodules vendored
View File

@ -16,3 +16,12 @@
[submodule "lmdb"]
path = lmdb
url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz

View File

@ -37,6 +37,9 @@ if (WITH_SIST2)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
)
endif ()
@ -67,6 +70,9 @@ if (WITH_SIST2_SCAN)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
)
endif ()
@ -116,10 +122,10 @@ if (WITH_SIST2)
target_compile_options(sist2
PRIVATE
# -O3
# -march=native
# -fno-stack-protector
# -fomit-frame-pointer
-Ofast
# -march=native
-fno-stack-protector
-fomit-frame-pointer
)
TARGET_LINK_LIBRARIES(
@ -150,6 +156,9 @@ if (WITH_SIST2)
m
bz2
magic
harfbuzz
openjp2
freetype
)
endif ()
@ -187,7 +196,7 @@ if (WITH_SIST2_SCAN)
)
target_compile_options(sist2_scan
PRIVATE
-O3
-Ofast
# -march=native
-fno-stack-protector
-fomit-frame-pointer
@ -215,6 +224,9 @@ if (WITH_SIST2_SCAN)
pthread
m
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
)
endif ()

View File

@ -58,7 +58,7 @@ File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | title |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* |
`video/*` | libav | - | yes, `jpeg` | title, comment |
`image/*` | libav | - | yes, `jpeg` | *planned* |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |

2
cJSON

@ -1 +1 @@
Subproject commit 2de7d04aaf67598e23d06573dcb4e370ebbad410
Subproject commit 533ff8a783be0d5c81581ab17cd2aeba3f0044c1

1
lib/harfbuzz Submodule

@ -0,0 +1 @@
Subproject commit 7cde68f10cdf2c3ff77c1d9077475c0fc034c75c

1
lib/openjpeg Submodule

@ -0,0 +1 @@
Subproject commit 5875a6b44618fb7dfd5cd6d742533eaee2014060

View File

@ -254,6 +254,7 @@ text/mcf, mcf
text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc
text/x-awk, awk
!video/x-jng, jng
@ -263,7 +264,7 @@ image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr
text/uri-list, uni|unis|uri|uris
text/uri-list, uji|unis|uri|uris
text/vnd.abc, abc
text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls
@ -360,3 +361,53 @@ application/x-wine-extension-ini,
application/x-cbz, cbz
application/x-cbr, cbr
application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
1 application/arj arj
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
256 text/richtext rt|rtf|rtx
257 text/rtf
258 text/scriplet wsc
259 text/x-awk awk
260 !video/x-jng jng
264 !image/vnd.adobe.photoshop psd
265 text/tab-separated-values tsv
266 text/troff man|me|ms|roff|t|tr
267 text/uri-list uni|unis|uri|uris uji|unis|uri|uris
268 text/vnd.abc abc
269 text/vnd.fmi.flexstor flx
270 text/vnd.wap.wmlscript wmls
361 application/x-cbz cbz
362 application/x-cbr cbr
363 application/x-ms-compress-szdd fon
364 application/x-atari-7800-rom a78
365 application/x-nes-rom nes
366 application/x-font-pfm pfm
367 application/x-gettext-translation
368 image/wmf
369 application/pgp-keys
370 image/x-3ds 3ds
371 application/x-lz4 lz4
372 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
373 application/vnd.oasis.opendocument.presentation odp
374 application/x-msaccess accdb
375 application/vnd.oasis.opendocument.spreadsheet ods
376 audio/x-aiff aiff|aif
377 text/x-ms-regedit reg
378 application/x-gamecube-rom
379 application/x-nintendo-ds-rom
380 text/x-objective-c
381 application/x-font-gdos
382 application/x-apple-diskimage
383 application/x-zstd zst
384 video/x-m4v m4v
385 message/news
386 application/vnd.symbian.install
387 application/x-lzh-compressed
388 application/x-dosdriver
389 application/vnd.tcpdump.pcap pcap
390 x-epoc/x-sisx-app
391 application/x-avira-qua
392 video/MP2T
393 application/x-snappy-framed
394 application/x-lz4+json jsonlz4
395 application/x-dmp dmp
396 application/zlib z
397 application/x-pgp-keyring
398 application/x-gdbm
399 application/x-font-pf2 pf2
400 application/x-zip
401 application/x-coredump
402 application/x-java-jmod jmod
403 application/x-terminfo
404 application/x-terminfo2
405 application/x-arc
406 application/vnd.lotus-1-2-3
407 image/x-win-bitmap
408 application/x-maxis-dbpf
409 text/PGP
410 audio/x-hx-aac-adts
411 application/x-chrome-extension
412 image/heic heic
413 image/x-gem

View File

@ -2,12 +2,28 @@
cd lib
cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4
USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no make -j 4
cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
make -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
make -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \

View File

@ -9,6 +9,22 @@ cd ..
mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
gmake -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
gmake -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg
cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \

View File

@ -12,7 +12,8 @@ major_mime = {
"audio": 7,
"image": 8,
"text": 9,
"application": 10
"application": 10,
"x-epoc": 11,
}
pdf = (

View File

@ -102,7 +102,14 @@ void elastic_flush() {
cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
fprintf(stderr, "%s\n", r->body);
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
fprintf(stderr, "%s\n", str);
cJSON_free(str);
}
}
}
cJSON_Delete(ret_json);

View File

@ -236,7 +236,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
if (!(SHOULD_IGNORE_CHAR(c)) || c == ' ') {
if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c);
}
}
@ -244,6 +244,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
default:
fprintf(stderr, "Invalid meta key (corrupt index): %x", key);
break;
}
key = getc(file);

View File

@ -3,14 +3,13 @@
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1);
strcpy(job->filepath, filepath);
job->base = base;
char *p = strrchr(filepath + base, '.');
if (p != NULL) {
job->ext = (int)(p - filepath + 1);
job->ext = (int) (p - filepath + 1);
} else {
job->ext = len;
}

View File

@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0.14";
static const char *const Version = "1.1.0";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@ -52,11 +52,10 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strcpy(ScanCtx.index.desc.root, args->path);
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path);
@ -93,6 +92,8 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
}
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);

View File

@ -186,7 +186,11 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
continue;
}
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph);

View File

@ -1,6 +1,9 @@
#include "src/sist.h"
#include "src/ctx.h"
#define MIN_SIZE 32
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
@ -22,8 +25,8 @@ AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
return jpeg;
}
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW;
int dstH;
@ -41,16 +44,22 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
}
}
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
);
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH);
int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH);
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize,
@ -81,7 +90,7 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret));
fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
@ -108,35 +117,40 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
}
#define APPEND_TAG_META(doc, tag, keyname) \
text_buffer_t tex = text_buffer_create(4096); \
text_buffer_append_string(&tex, tag->value); \
text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string0(&tex, tag->value); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
char key[32];
strncpy(key, tag->key, sizeof(key));
if (strcmp(tag->key, "artist") == 0) {
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "genre") == 0) {
} else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(tag->key, "title") == 0) {
} else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "album_artist") == 0) {
} else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(tag->key, "album") == 0) {
} else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum)
}
}
}
__always_inline
void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include_audio_tags) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
@ -146,17 +160,20 @@ void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate;
meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key;
for (; *key; ++key) *key = (char) tolower(*key);
char key[32];
strncpy(key, tag->key, sizeof(key));
if (strcmp(tag->key, "title") == 0 && include_audio_tags) {
char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "title") == 0 && include_audio_tags) {
APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) {
} else if (strcmp(key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent)
}
}
@ -174,7 +191,7 @@ void parse_media(const char *filepath, document_t *doc) {
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res));
fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return;
}
@ -224,7 +241,7 @@ void parse_media(const char *filepath, document_t *doc) {
append_video_meta(pFormatCtx, doc, audio_stream == -1);
}
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) {
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
@ -259,6 +276,14 @@ void parse_media(const char *filepath, document_t *doc) {
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
@ -268,7 +293,8 @@ void parse_media(const char *filepath, document_t *doc) {
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);

View File

@ -5,6 +5,7 @@
#include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media(const char * filepath, document_t *doc);

View File

@ -1,10 +1,12 @@
#include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64];
char lower[8];
char *p = lower;
while ((*ext)) {
int cnt = 0;
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++);
cnt++;
}
*p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@ -39,334 +39,385 @@ enum mime {
application_oda=655391,
application_ogg=655392,
application_pdf=655393 | 0x40000000,
application_pgp_signature=655394,
application_pkcs7_signature=655395,
application_pkix_cert=655396,
application_postscript=655397,
application_pro_eng=655398,
application_ringing_tones=655399,
application_smil=655400,
application_solids=655401,
application_sounder=655402,
application_step=655403,
application_streamingmedia=655404,
application_vda=655405,
application_vnd_fdf=655406,
application_vnd_font_fontforge_sfd=655407,
application_vnd_hp_hpgl=655408,
application_vnd_iccprofile=655409,
application_vnd_ms_cab_compressed=655410,
application_vnd_ms_excel=655411,
application_vnd_ms_fontobject=655412,
application_vnd_ms_opentype=655413 | 0x20000000,
application_vnd_ms_pki_certstore=655414,
application_vnd_ms_pki_pko=655415,
application_vnd_ms_pki_seccat=655416,
application_vnd_ms_powerpoint=655417,
application_vnd_ms_project=655418,
application_vnd_oasis_opendocument_base=655419,
application_vnd_oasis_opendocument_formula=655420,
application_vnd_oasis_opendocument_graphics=655421,
application_vnd_oasis_opendocument_text=655422,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655423,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655424,
application_vnd_wap_wmlc=655425,
application_vnd_wap_wmlscriptc=655426,
application_vnd_xara=655427,
application_vocaltec_media_desc=655428,
application_vocaltec_media_file=655429,
application_winhelp=655430,
application_wordperfect=655431,
application_wordperfect6_0=655432,
application_wordperfect6_1=655433,
application_x_123=655434,
application_x_7z_compressed=655435,
application_x_aim=655436,
application_x_archive=655437,
application_x_authorware_bin=655438,
application_x_authorware_map=655439,
application_x_authorware_seg=655440,
application_x_bcpio=655441,
application_x_bittorrent=655442,
application_x_bsh=655443,
application_x_bytecode_python=655444,
application_x_bzip=655445,
application_x_bzip2=655446,
application_x_cbr=655447,
application_x_cbz=655448 | 0x40000000,
application_x_cdlink=655449,
application_x_chat=655450,
application_x_cocoa=655451,
application_x_conference=655452,
application_x_cpio=655453,
application_x_dbf=655454,
application_x_dbt=655455,
application_x_debian_package=655456,
application_x_deepv=655457,
application_x_director=655458,
application_x_dosexec=655459,
application_x_dvi=655460,
application_x_elc=655461,
application_pgp_keys=655394,
application_pgp_signature=655395,
application_pkcs7_signature=655396,
application_pkix_cert=655397,
application_postscript=655398,
application_pro_eng=655399,
application_ringing_tones=655400,
application_smil=655401,
application_solids=655402,
application_sounder=655403,
application_step=655404,
application_streamingmedia=655405,
application_vda=655406,
application_vnd_fdf=655407,
application_vnd_font_fontforge_sfd=655408,
application_vnd_hp_hpgl=655409,
application_vnd_iccprofile=655410,
application_vnd_lotus_1_2_3=655411,
application_vnd_ms_cab_compressed=655412,
application_vnd_ms_excel=655413,
application_vnd_ms_fontobject=655414,
application_vnd_ms_opentype=655415 | 0x20000000,
application_vnd_ms_pki_certstore=655416,
application_vnd_ms_pki_pko=655417,
application_vnd_ms_pki_seccat=655418,
application_vnd_ms_powerpoint=655419,
application_vnd_ms_project=655420,
application_vnd_oasis_opendocument_base=655421,
application_vnd_oasis_opendocument_formula=655422,
application_vnd_oasis_opendocument_graphics=655423,
application_vnd_oasis_opendocument_presentation=655424,
application_vnd_oasis_opendocument_spreadsheet=655425,
application_vnd_oasis_opendocument_text=655426,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655427,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655428,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655429,
application_vnd_symbian_install=655430,
application_vnd_tcpdump_pcap=655431,
application_vnd_wap_wmlc=655432,
application_vnd_wap_wmlscriptc=655433,
application_vnd_xara=655434,
application_vocaltec_media_desc=655435,
application_vocaltec_media_file=655436,
application_winhelp=655437,
application_wordperfect=655438,
application_wordperfect6_0=655439,
application_wordperfect6_1=655440,
application_x_123=655441,
application_x_7z_compressed=655442,
application_x_aim=655443,
application_x_apple_diskimage=655444,
application_x_arc=655445,
application_x_archive=655446,
application_x_atari_7800_rom=655447,
application_x_authorware_bin=655448,
application_x_authorware_map=655449,
application_x_authorware_seg=655450,
application_x_avira_qua=655451,
application_x_bcpio=655452,
application_x_bittorrent=655453,
application_x_bsh=655454,
application_x_bytecode_python=655455,
application_x_bzip=655456,
application_x_bzip2=655457,
application_x_cbr=655458,
application_x_cbz=655459 | 0x40000000,
application_x_cdlink=655460,
application_x_chat=655461,
application_x_chrome_extension=655462,
application_x_cocoa=655463,
application_x_conference=655464,
application_x_coredump=655465,
application_x_cpio=655466,
application_x_dbf=655467,
application_x_dbt=655468,
application_x_debian_package=655469,
application_x_deepv=655470,
application_x_director=655471,
application_x_dmp=655472,
application_x_dosdriver=655473,
application_x_dosexec=655474,
application_x_dvi=655475,
application_x_elc=655476,
application_x_empty=1,
application_x_envoy=655463,
application_x_esrehber=655464,
application_x_excel=655465,
application_x_executable=655466,
application_x_font_sfn=655467,
application_x_font_ttf=655468 | 0x20000000,
application_x_freelance=655469,
application_x_git=655470,
application_x_gsp=655471,
application_x_gss=655472,
application_x_gtar=655473,
application_x_gzip=655474,
application_x_hdf=655475,
application_x_helpfile=655476,
application_x_httpd_imap=655477,
application_x_ima=655478,
application_x_innosetup=655479,
application_x_internett_signup=655480,
application_x_inventor=655481,
application_x_ip2=655482,
application_x_java_applet=655483,
application_x_java_commerce=655484,
application_x_java_image=655485,
application_x_java_keystore=655486,
application_x_kdelnk=655487,
application_x_koan=655488,
application_x_latex=655489,
application_x_livescreen=655490,
application_x_lotus=655491,
application_x_lzh=655492,
application_x_lzx=655493,
application_x_mach_binary=655494,
application_x_mach_executable=655495,
application_x_magic_cap_package_1_0=655496,
application_x_mathcad=655497,
application_x_meme=655498,
application_x_midi=655499,
application_x_mif=655500,
application_x_mix_transfer=655501,
application_x_mobipocket_ebook=655502,
application_x_ms_compress_szdd=655503,
application_x_ms_pdb=655504,
application_x_ms_reader=655505,
application_x_navi_animation=655506,
application_x_navidoc=655507,
application_x_navimap=655508,
application_x_navistyle=655509,
application_x_netcdf=655510,
application_x_newton_compatible_pkg=655511,
application_x_object=655512,
application_x_omc=655513,
application_x_omcdatamaker=655514,
application_x_omcregerator=655515,
application_x_pagemaker=655516,
application_x_pcl=655517,
application_x_pixclscript=655518,
application_x_pkcs7_certreqresp=655519,
application_x_pkcs7_signature=655520,
application_x_project=655521,
application_x_qpro=655522,
application_x_rar=655523,
application_x_rpm=655524,
application_x_sdp=655525,
application_x_sea=655526,
application_x_seelogo=655527,
application_x_setupscript=655528,
application_x_shar=655529,
application_x_sharedlib=655530,
application_x_shockwave_flash=655531,
application_x_sprite=655532,
application_x_sqlite3=655533,
application_x_sv4cpio=655534,
application_x_sv4crc=655535,
application_x_tar=655536,
application_x_tbook=655537,
application_x_tex_tfm=655538,
application_x_texinfo=655539,
application_x_ustar=655540,
application_x_visio=655541,
application_x_vnd_audioexplosion_mzz=655542,
application_x_vnd_ls_xpix=655543,
application_x_vrml=655544,
application_x_wais_source=655545,
application_x_wine_extension_ini=655546,
application_x_wintalk=655547,
application_x_world=655548,
application_x_wri=655549,
application_x_x509_ca_cert=655550,
application_x_xz=655551,
application_xml=655552,
application_zip=655553,
audio_it=458946,
audio_make=458947,
audio_mid=458948,
audio_midi=458949,
audio_mp4=458950,
audio_mpeg=458951,
audio_ogg=458952,
audio_s3m=458953,
audio_tsp_audio=458954,
audio_tsplayer=458955,
audio_vnd_qcelp=458956,
audio_voxware=458957,
audio_x_flac=458958,
audio_x_gsm=458959,
audio_x_jam=458960,
audio_x_liveaudio=458961,
audio_x_m4a=458962,
audio_x_midi=458963,
audio_x_mod=458964,
audio_x_mp4a_latm=458965,
audio_x_mpeg_3=458966,
audio_x_mpequrl=458967,
audio_x_nspaudio=458968,
audio_x_pn_realaudio=458969,
audio_x_psid=458970,
audio_x_realaudio=458971,
audio_x_twinvq=458972,
audio_x_twinvq_plugin=458973,
audio_x_voc=458974,
audio_x_wav=458975,
audio_xm=458976,
font_otf=327905 | 0x20000000,
font_sfnt=327906 | 0x20000000,
font_woff=327907 | 0x20000000,
font_woff2=327908 | 0x20000000,
image_cmu_raster=524517,
image_fif=524518,
image_florian=524519,
image_g3fax=524520,
image_gif=524521,
image_ief=524522,
image_jpeg=524523,
image_jutvision=524524,
image_naplps=524525,
image_pict=524526,
image_png=524527,
image_svg=524528 | 0x80000000,
image_svg_xml=524529 | 0x80000000,
image_tiff=524530,
image_vnd_adobe_photoshop=524531 | 0x80000000,
image_vnd_djvu=524532 | 0x80000000,
image_vnd_fpx=524533,
image_vnd_microsoft_icon=524534,
image_vnd_rn_realflash=524535,
image_vnd_rn_realpix=524536,
image_vnd_wap_wbmp=524537,
image_vnd_xiff=524538,
image_webp=524539,
image_x_cmu_raster=524540,
image_x_cur=524541,
image_x_dwg=524542,
image_x_eps=524543,
image_x_exr=524544,
image_x_icns=524545,
image_x_icon=524546 | 0x80000000,
image_x_jg=524547,
image_x_jps=524548,
image_x_ms_bmp=524549,
image_x_niff=524550,
image_x_pcx=524551,
image_x_pict=524552,
image_x_portable_bitmap=524553,
image_x_portable_graymap=524554,
image_x_portable_pixmap=524555,
image_x_quicktime=524556,
image_x_rgb=524557,
image_x_tga=524558,
image_x_tiff=524559,
image_x_xcf=524560 | 0x80000000,
image_x_xpixmap=524561 | 0x80000000,
image_x_xwindowdump=524562,
message_rfc822=196883,
model_vnd_dwf=65812,
model_vnd_gdl=65813,
model_vnd_gs_gdl=65814,
model_vrml=65815,
model_x_pov=65816,
text_asp=590105,
text_css=590106,
text_html=590107,
text_javascript=590108,
text_mcf=590109,
text_pascal=590110,
text_plain=590111,
text_richtext=590112,
text_scriplet=590113,
text_tab_separated_values=590114,
text_troff=590115,
text_uri_list=590116,
text_vnd_abc=590117,
text_vnd_fmi_flexstor=590118,
text_vnd_wap_wml=590119,
text_vnd_wap_wmlscript=590120,
text_webviewhtml=590121,
text_x_Algol68=590122,
text_x_asm=590123,
text_x_audiosoft_intra=590124,
text_x_awk=590125,
text_x_bcpl=590126,
text_x_c=590127,
text_x_c__=590128,
text_x_component=590129,
text_x_diff=590130,
text_x_fortran=590131,
text_x_java=590132,
text_x_la_asf=590133,
text_x_lisp=590134,
text_x_m=590135,
text_x_m4=590136,
text_x_makefile=590137,
text_x_msdos_batch=590138,
text_x_pascal=590139,
text_x_perl=590140,
text_x_php=590141,
text_x_po=590142,
text_x_python=590143,
text_x_ruby=590144,
text_x_sass=590145,
text_x_scss=590146,
text_x_server_parsed_html=590147,
text_x_setext=590148,
text_x_sgml=590149,
text_x_shellscript=590150,
text_x_speech=590151,
text_x_tcl=590152,
text_x_tex=590153,
text_x_uil=590154,
text_x_uuencode=590155,
text_x_vcalendar=590156,
text_x_vcard=590157,
text_xml=590158,
video_animaflex=393551,
video_avi=393552,
video_avs_video=393553,
video_mp4=393554,
video_mpeg=393555,
video_quicktime=393556,
video_vdo=393557,
video_vivo=393558,
video_vnd_rn_realvideo=393559,
video_vosaic=393560,
video_webm=393561,
video_x_amt_demorun=393562,
video_x_amt_showrun=393563,
video_x_atomic3d_feature=393564,
video_x_dl=393565,
video_x_dv=393566,
video_x_fli=393567,
video_x_flv=393568,
video_x_isvideo=393569,
video_x_jng=393570 | 0x80000000,
video_x_matroska=393571,
video_x_mng=393572,
video_x_motion_jpeg=393573,
video_x_ms_asf=393574,
video_x_msvideo=393575,
video_x_qtc=393576,
video_x_sgi_movie=393577,
application_x_envoy=655478,
application_x_esrehber=655479,
application_x_excel=655480,
application_x_executable=655481,
application_x_font_gdos=655482,
application_x_font_pf2=655483,
application_x_font_pfm=655484,
application_x_font_sfn=655485,
application_x_font_ttf=655486 | 0x20000000,
application_x_freelance=655487,
application_x_gamecube_rom=655488,
application_x_gdbm=655489,
application_x_gettext_translation=655490,
application_x_git=655491,
application_x_gsp=655492,
application_x_gss=655493,
application_x_gtar=655494,
application_x_gzip=655495,
application_x_hdf=655496,
application_x_helpfile=655497,
application_x_httpd_imap=655498,
application_x_ima=655499,
application_x_innosetup=655500,
application_x_internett_signup=655501,
application_x_inventor=655502,
application_x_ip2=655503,
application_x_java_applet=655504,
application_x_java_commerce=655505,
application_x_java_image=655506,
application_x_java_jmod=655507,
application_x_java_keystore=655508,
application_x_kdelnk=655509,
application_x_koan=655510,
application_x_latex=655511,
application_x_livescreen=655512,
application_x_lotus=655513,
application_x_lz4=655514,
application_x_lz4_json=655515,
application_x_lzh=655516,
application_x_lzh_compressed=655517,
application_x_lzx=655518,
application_x_mach_binary=655519,
application_x_mach_executable=655520,
application_x_magic_cap_package_1_0=655521,
application_x_mathcad=655522,
application_x_maxis_dbpf=655523,
application_x_meme=655524,
application_x_midi=655525,
application_x_mif=655526,
application_x_mix_transfer=655527,
application_x_mobipocket_ebook=655528,
application_x_ms_compress_szdd=655529,
application_x_ms_pdb=655530,
application_x_ms_reader=655531,
application_x_msaccess=655532,
application_x_navi_animation=655533,
application_x_navidoc=655534,
application_x_navimap=655535,
application_x_navistyle=655536,
application_x_nes_rom=655537,
application_x_netcdf=655538,
application_x_newton_compatible_pkg=655539,
application_x_nintendo_ds_rom=655540,
application_x_object=655541,
application_x_omc=655542,
application_x_omcdatamaker=655543,
application_x_omcregerator=655544,
application_x_pagemaker=655545,
application_x_pcl=655546,
application_x_pgp_keyring=655547,
application_x_pixclscript=655548,
application_x_pkcs7_certreqresp=655549,
application_x_pkcs7_signature=655550,
application_x_project=655551,
application_x_qpro=655552,
application_x_rar=655553,
application_x_rpm=655554,
application_x_sdp=655555,
application_x_sea=655556,
application_x_seelogo=655557,
application_x_setupscript=655558,
application_x_shar=655559,
application_x_sharedlib=655560,
application_x_shockwave_flash=655561,
application_x_snappy_framed=655562,
application_x_sprite=655563,
application_x_sqlite3=655564,
application_x_sv4cpio=655565,
application_x_sv4crc=655566,
application_x_tar=655567,
application_x_tbook=655568,
application_x_terminfo=655569,
application_x_terminfo2=655570,
application_x_tex_tfm=655571,
application_x_texinfo=655572,
application_x_ustar=655573,
application_x_visio=655574,
application_x_vnd_audioexplosion_mzz=655575,
application_x_vnd_ls_xpix=655576,
application_x_vrml=655577,
application_x_wais_source=655578,
application_x_wine_extension_ini=655579,
application_x_wintalk=655580,
application_x_world=655581,
application_x_wri=655582,
application_x_x509_ca_cert=655583,
application_x_xz=655584,
application_x_zip=655585,
application_x_zstd=655586,
application_xml=655587,
application_zip=655588,
application_zlib=655589,
audio_it=458982,
audio_make=458983,
audio_mid=458984,
audio_midi=458985,
audio_mp4=458986,
audio_mpeg=458987,
audio_ogg=458988,
audio_s3m=458989,
audio_tsp_audio=458990,
audio_tsplayer=458991,
audio_vnd_qcelp=458992,
audio_voxware=458993,
audio_x_aiff=458994,
audio_x_flac=458995,
audio_x_gsm=458996,
audio_x_hx_aac_adts=458997,
audio_x_jam=458998,
audio_x_liveaudio=458999,
audio_x_m4a=459000,
audio_x_midi=459001,
audio_x_mod=459002,
audio_x_mp4a_latm=459003,
audio_x_mpeg_3=459004,
audio_x_mpequrl=459005,
audio_x_nspaudio=459006,
audio_x_pn_realaudio=459007,
audio_x_psid=459008,
audio_x_realaudio=459009,
audio_x_twinvq=459010,
audio_x_twinvq_plugin=459011,
audio_x_voc=459012,
audio_x_wav=459013,
audio_xm=459014,
font_otf=327943 | 0x20000000,
font_sfnt=327944 | 0x20000000,
font_woff=327945 | 0x20000000,
font_woff2=327946 | 0x20000000,
image_cmu_raster=524555,
image_fif=524556,
image_florian=524557,
image_g3fax=524558,
image_gif=524559,
image_heic=524560,
image_ief=524561,
image_jpeg=524562,
image_jutvision=524563,
image_naplps=524564,
image_pict=524565,
image_png=524566,
image_svg=524567 | 0x80000000,
image_svg_xml=524568 | 0x80000000,
image_tiff=524569,
image_vnd_adobe_photoshop=524570 | 0x80000000,
image_vnd_djvu=524571 | 0x80000000,
image_vnd_fpx=524572,
image_vnd_microsoft_icon=524573,
image_vnd_rn_realflash=524574,
image_vnd_rn_realpix=524575,
image_vnd_wap_wbmp=524576,
image_vnd_xiff=524577,
image_webp=524578,
image_wmf=524579,
image_x_3ds=524580,
image_x_cmu_raster=524581,
image_x_cur=524582,
image_x_dwg=524583,
image_x_eps=524584,
image_x_exr=524585,
image_x_gem=524586,
image_x_icns=524587,
image_x_icon=524588 | 0x80000000,
image_x_jg=524589,
image_x_jps=524590,
image_x_ms_bmp=524591,
image_x_niff=524592,
image_x_pcx=524593,
image_x_pict=524594,
image_x_portable_bitmap=524595,
image_x_portable_graymap=524596,
image_x_portable_pixmap=524597,
image_x_quicktime=524598,
image_x_rgb=524599,
image_x_tga=524600,
image_x_tiff=524601,
image_x_win_bitmap=524602,
image_x_xcf=524603 | 0x80000000,
image_x_xpixmap=524604 | 0x80000000,
image_x_xwindowdump=524605,
message_news=196926,
message_rfc822=196927,
model_vnd_dwf=65856,
model_vnd_gdl=65857,
model_vnd_gs_gdl=65858,
model_vrml=65859,
model_x_pov=65860,
text_PGP=590149,
text_asp=590150,
text_css=590151,
text_html=590152,
text_javascript=590153,
text_mcf=590154,
text_pascal=590155,
text_plain=590156,
text_richtext=590157,
text_rtf=590158,
text_scriplet=590159,
text_tab_separated_values=590160,
text_troff=590161,
text_uri_list=590162,
text_vnd_abc=590163,
text_vnd_fmi_flexstor=590164,
text_vnd_wap_wml=590165,
text_vnd_wap_wmlscript=590166,
text_webviewhtml=590167,
text_x_Algol68=590168,
text_x_asm=590169,
text_x_audiosoft_intra=590170,
text_x_awk=590171,
text_x_bcpl=590172,
text_x_c=590173,
text_x_c__=590174,
text_x_component=590175,
text_x_diff=590176,
text_x_fortran=590177,
text_x_java=590178,
text_x_la_asf=590179,
text_x_lisp=590180,
text_x_m=590181,
text_x_m4=590182,
text_x_makefile=590183,
text_x_ms_regedit=590184,
text_x_msdos_batch=590185,
text_x_objective_c=590186,
text_x_pascal=590187,
text_x_perl=590188,
text_x_php=590189,
text_x_po=590190,
text_x_python=590191,
text_x_ruby=590192,
text_x_sass=590193,
text_x_scss=590194,
text_x_server_parsed_html=590195,
text_x_setext=590196,
text_x_sgml=590197,
text_x_shellscript=590198,
text_x_speech=590199,
text_x_tcl=590200,
text_x_tex=590201,
text_x_uil=590202,
text_x_uuencode=590203,
text_x_vcalendar=590204,
text_x_vcard=590205,
text_xml=590206,
video_MP2T=393599,
video_animaflex=393600,
video_avi=393601,
video_avs_video=393602,
video_mp4=393603,
video_mpeg=393604,
video_quicktime=393605,
video_vdo=393606,
video_vivo=393607,
video_vnd_rn_realvideo=393608,
video_vosaic=393609,
video_webm=393610,
video_x_amt_demorun=393611,
video_x_amt_showrun=393612,
video_x_atomic3d_feature=393613,
video_x_dl=393614,
video_x_dv=393615,
video_x_fli=393616,
video_x_flv=393617,
video_x_isvideo=393618,
video_x_jng=393619 | 0x80000000,
video_x_m4v=393620,
video_x_matroska=393621,
video_x_mng=393622,
video_x_motion_jpeg=393623,
video_x_ms_asf=393624,
video_x_msvideo=393625,
video_x_qtc=393626,
video_x_sgi_movie=393627,
x_epoc_x_sisx_app=721308,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@ -625,6 +676,7 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal";
case text_plain: return "text/plain";
case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
case text_scriplet: return "text/scriplet";
case text_x_awk: return "text/x-awk";
case video_x_jng: return "video/x-jng";
@ -730,6 +782,56 @@ case application_x_wine_extension_ini: return "application/x-wine-extension-ini"
case application_x_cbz: return "application/x-cbz";
case application_x_cbr: return "application/x-cbr";
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
case application_x_atari_7800_rom: return "application/x-atari-7800-rom";
case application_x_nes_rom: return "application/x-nes-rom";
case application_x_font_pfm: return "application/x-font-pfm";
case application_x_gettext_translation: return "application/x-gettext-translation";
case image_wmf: return "image/wmf";
case application_pgp_keys: return "application/pgp-keys";
case image_x_3ds: return "image/x-3ds";
case application_x_lz4: return "application/x-lz4";
case application_vnd_openxmlformats_officedocument_presentationml_presentation: return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
case application_vnd_oasis_opendocument_presentation: return "application/vnd.oasis.opendocument.presentation";
case application_x_msaccess: return "application/x-msaccess";
case application_vnd_oasis_opendocument_spreadsheet: return "application/vnd.oasis.opendocument.spreadsheet";
case audio_x_aiff: return "audio/x-aiff";
case text_x_ms_regedit: return "text/x-ms-regedit";
case application_x_gamecube_rom: return "application/x-gamecube-rom";
case application_x_nintendo_ds_rom: return "application/x-nintendo-ds-rom";
case text_x_objective_c: return "text/x-objective-c";
case application_x_font_gdos: return "application/x-font-gdos";
case application_x_apple_diskimage: return "application/x-apple-diskimage";
case application_x_zstd: return "application/x-zstd";
case video_x_m4v: return "video/x-m4v";
case message_news: return "message/news";
case application_vnd_symbian_install: return "application/vnd.symbian.install";
case application_x_lzh_compressed: return "application/x-lzh-compressed";
case application_x_dosdriver: return "application/x-dosdriver";
case application_vnd_tcpdump_pcap: return "application/vnd.tcpdump.pcap";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_avira_qua: return "application/x-avira-qua";
case video_MP2T: return "video/MP2T";
case application_x_snappy_framed: return "application/x-snappy-framed";
case application_x_lz4_json: return "application/x-lz4+json";
case application_x_dmp: return "application/x-dmp";
case application_zlib: return "application/zlib";
case application_x_pgp_keyring: return "application/x-pgp-keyring";
case application_x_gdbm: return "application/x-gdbm";
case application_x_font_pf2: return "application/x-font-pf2";
case application_x_zip: return "application/x-zip";
case application_x_coredump: return "application/x-coredump";
case application_x_java_jmod: return "application/x-java-jmod";
case application_x_terminfo: return "application/x-terminfo";
case application_x_terminfo2: return "application/x-terminfo2";
case application_x_arc: return "application/x-arc";
case application_vnd_lotus_1_2_3: return "application/vnd.lotus-1-2-3";
case image_x_win_bitmap: return "image/x-win-bitmap";
case application_x_maxis_dbpf: return "application/x-maxis-dbpf";
case text_PGP: return "text/PGP";
case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
case application_x_chrome_extension: return "application/x-chrome-extension";
case image_heic: return "image/heic";
case image_x_gem: return "image/x-gem";
default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@ -1097,7 +1199,7 @@ g_hash_table_insert(ext_table, "ms", (gpointer)text_troff);
g_hash_table_insert(ext_table, "roff", (gpointer)text_troff);
g_hash_table_insert(ext_table, "t", (gpointer)text_troff);
g_hash_table_insert(ext_table, "tr", (gpointer)text_troff);
g_hash_table_insert(ext_table, "uni", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uji", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list);
@ -1211,6 +1313,27 @@ g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp);
g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz);
g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr);
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(ext_table, "a78", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(ext_table, "nes", (gpointer)application_x_nes_rom);
g_hash_table_insert(ext_table, "pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(ext_table, "3ds", (gpointer)image_x_3ds);
g_hash_table_insert(ext_table, "lz4", (gpointer)application_x_lz4);
g_hash_table_insert(ext_table, "pptx", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(ext_table, "odp", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
g_hash_table_insert(ext_table, "ods", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(ext_table, "aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "aif", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "reg", (gpointer)text_x_ms_regedit);
g_hash_table_insert(ext_table, "zst", (gpointer)application_x_zstd);
g_hash_table_insert(ext_table, "m4v", (gpointer)video_x_m4v);
g_hash_table_insert(ext_table, "pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(ext_table, "jsonlz4", (gpointer)application_x_lz4_json);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@ -1469,6 +1592,7 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk);
g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng);
@ -1574,5 +1698,55 @@ g_hash_table_insert(mime_table, "application/x-wine-extension-ini", (gpointer)ap
g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz);
g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr);
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(mime_table, "application/x-atari-7800-rom", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(mime_table, "application/x-nes-rom", (gpointer)application_x_nes_rom);
g_hash_table_insert(mime_table, "application/x-font-pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(mime_table, "application/x-gettext-translation", (gpointer)application_x_gettext_translation);
g_hash_table_insert(mime_table, "image/wmf", (gpointer)image_wmf);
g_hash_table_insert(mime_table, "application/pgp-keys", (gpointer)application_pgp_keys);
g_hash_table_insert(mime_table, "image/x-3ds", (gpointer)image_x_3ds);
g_hash_table_insert(mime_table, "application/x-lz4", (gpointer)application_x_lz4);
g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.presentationml.presentation", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.presentation", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.spreadsheet", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(mime_table, "audio/x-aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(mime_table, "text/x-ms-regedit", (gpointer)text_x_ms_regedit);
g_hash_table_insert(mime_table, "application/x-gamecube-rom", (gpointer)application_x_gamecube_rom);
g_hash_table_insert(mime_table, "application/x-nintendo-ds-rom", (gpointer)application_x_nintendo_ds_rom);
g_hash_table_insert(mime_table, "text/x-objective-c", (gpointer)text_x_objective_c);
g_hash_table_insert(mime_table, "application/x-font-gdos", (gpointer)application_x_font_gdos);
g_hash_table_insert(mime_table, "application/x-apple-diskimage", (gpointer)application_x_apple_diskimage);
g_hash_table_insert(mime_table, "application/x-zstd", (gpointer)application_x_zstd);
g_hash_table_insert(mime_table, "video/x-m4v", (gpointer)video_x_m4v);
g_hash_table_insert(mime_table, "message/news", (gpointer)message_news);
g_hash_table_insert(mime_table, "application/vnd.symbian.install", (gpointer)application_vnd_symbian_install);
g_hash_table_insert(mime_table, "application/x-lzh-compressed", (gpointer)application_x_lzh_compressed);
g_hash_table_insert(mime_table, "application/x-dosdriver", (gpointer)application_x_dosdriver);
g_hash_table_insert(mime_table, "application/vnd.tcpdump.pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-avira-qua", (gpointer)application_x_avira_qua);
g_hash_table_insert(mime_table, "video/MP2T", (gpointer)video_MP2T);
g_hash_table_insert(mime_table, "application/x-snappy-framed", (gpointer)application_x_snappy_framed);
g_hash_table_insert(mime_table, "application/x-lz4+json", (gpointer)application_x_lz4_json);
g_hash_table_insert(mime_table, "application/x-dmp", (gpointer)application_x_dmp);
g_hash_table_insert(mime_table, "application/zlib", (gpointer)application_zlib);
g_hash_table_insert(mime_table, "application/x-pgp-keyring", (gpointer)application_x_pgp_keyring);
g_hash_table_insert(mime_table, "application/x-gdbm", (gpointer)application_x_gdbm);
g_hash_table_insert(mime_table, "application/x-font-pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(mime_table, "application/x-zip", (gpointer)application_x_zip);
g_hash_table_insert(mime_table, "application/x-coredump", (gpointer)application_x_coredump);
g_hash_table_insert(mime_table, "application/x-java-jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(mime_table, "application/x-terminfo", (gpointer)application_x_terminfo);
g_hash_table_insert(mime_table, "application/x-terminfo2", (gpointer)application_x_terminfo2);
g_hash_table_insert(mime_table, "application/x-arc", (gpointer)application_x_arc);
g_hash_table_insert(mime_table, "application/vnd.lotus-1-2-3", (gpointer)application_vnd_lotus_1_2_3);
g_hash_table_insert(mime_table, "image/x-win-bitmap", (gpointer)image_x_win_bitmap);
g_hash_table_insert(mime_table, "application/x-maxis-dbpf", (gpointer)application_x_maxis_dbpf);
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_adts);
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
return mime_table;}
#endif

View File

@ -1,7 +1,7 @@
#include "src/sist.h"
#include "src/ctx.h"
__thread magic_t Magic;
__thread magic_t Magic = NULL;
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
@ -62,7 +62,7 @@ void parse(void *arg) {
if (job->info.st_size == 0) {
doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') {
} else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
@ -80,11 +80,18 @@ void parse(void *arg) {
bytes_read = read(fd, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
perror("read");
close(fd);
free(job);
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base);
fprintf(stderr, "Couldn't find mime %s, %s\n", magic_mime_str, job->filepath + job->base);
}
}
}
@ -93,7 +100,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(job->filepath, &doc);
} else if (IS_PDF(doc.mime)) {

View File

@ -1,10 +1,22 @@
#include <src/ctx.h>
#include "pdf.h"
#include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0);
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
fz_rect bounds = fz_bound_page(ctx, cover);
float scale;
@ -24,24 +36,49 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_var(err);
fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
fz_rethrow(ctx);
err = ctx->error.errcode;
fz_drop_device(ctx, dev);
if (err != 0) {
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
}
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
return cover;
}
@ -49,6 +86,32 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void init_ctx(fz_context *ctx) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
static int mu_is_initialized = 0;
@ -57,105 +120,140 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
mu_is_initialized = 1;
}
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_stream *stream = NULL;
fz_document *fzdoc = NULL;
fz_var(stream);
init_ctx(ctx);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx)
{
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
char title[4096] = {'\0',};
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
printf("Title: %s\n", title); //todo rmv
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title) + 1);
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = fz_count_pages(ctx, fzdoc);
fz_page *cover = render_cover(ctx, doc, fzdoc);
fz_stext_options opts;
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page; if (current_page == 0) {
page = cover;
} else {
page = fz_load_page(ctx, fzdoc, current_page);
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_try(ctx)
fz_run_page_contents(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
fz_catch(ctx)
fz_rethrow(ctx);
fz_drop_device(ctx, dev);
fz_stext_block *block = stext->first_block;
while (block != NULL) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
block = block->next;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
}
block = block->next;
}
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
}
write_loop_end:;
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message);
return;
}
char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
fz_catch(ctx)
;
if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle;
strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content)
}
int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_page *cover = render_cover(ctx, doc, fzdoc);
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
fz_var(err);
fz_try(ctx)
fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block;
while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) {
break;
}
block = block->next;
}
fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
text_buffer_destroy(&text_buf);
}

View File

@ -27,17 +27,14 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
read(*fd, intermediate_buf + bytes_read, to_read);
}
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
strcpy(meta->strval, tex.dyn_buffer.buf);
APPEND_META(doc, meta)
free(intermediate_buf);
text_buffer_destroy(&tex);
}

View File

@ -16,6 +16,7 @@
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <ctype.h>
#include <mupdf/fitz.h>
#include <mupdf/pdf.h>
@ -49,6 +50,7 @@
#include "parsing/media.h"
#include "parsing/font.h"
#include "cli.h"
#include "utf8.h/utf8.h"
#ifndef SIST_SCAN_ONLY
#include "src/index/elastic.h"

View File

@ -25,6 +25,7 @@ typedef struct tpool {
int done_cnt;
int stop;
void (*cleanup_func)();
} tpool_t;
@ -100,7 +101,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
while (1) {
pthread_mutex_lock(&(pool->work_mutex));
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;
}
@ -120,7 +121,7 @@ static void *tpool_worker(void *arg) {
pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++;
progress_bar_print((double)pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond));
@ -188,11 +189,11 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt =0;
pool->done_cnt =0;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->cleanup_func = cleanup_func;
pool->threads = malloc(sizeof(pthread_t) * thread_cnt);
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL);
@ -202,11 +203,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL;
pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) {
return pool;
}
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
}
return pool;
}

View File

@ -9,6 +9,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)());
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);

View File

@ -89,10 +89,71 @@ void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0');
}
int text_buffer_append_string(text_buffer_t *buf, char * str) {
char * ptr = str;
while (*ptr) {
text_buffer_append_char(buf, *ptr++);
__always_inline
int utf8_validchr(const char* s) {
if (0x00 == (0x80 & *s)) {
return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char*)v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
}
int text_buffer_append_string0(text_buffer_t *buf, char *str) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0'; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
}
@ -104,15 +165,31 @@ int text_buffer_append_char(text_buffer_t *buf, int c) {
dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE;
if (buf->dyn_buffer.cur >= buf->max_size) {
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
} else {
buf->last_char_was_whitespace = FALSE;
dyn_buffer_write_char(&buf->dyn_buffer, (char) c);
grow_buffer_small(&buf->dyn_buffer);
if (buf->dyn_buffer.cur >= buf->max_size) {
if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL;
}
}
@ -144,7 +221,7 @@ dyn_buffer_t url_escape(char *str) {
dyn_buffer_t text = dyn_buffer_create();
char * ptr = str;
char *ptr = str;
while (*ptr) {
if (*ptr == '#') {
dyn_buffer_write(&text, "%23", 3);
@ -177,7 +254,7 @@ char *expandpath(const char *path) {
wordexp_t w;
wordexp(path, &w, 0);
char * expanded = malloc(strlen(w.we_wordv[0]) + 2);
char *expanded = malloc(strlen(w.we_wordv[0]) + 2);
strcpy(expanded, w.we_wordv[0]);
strcat(expanded, "/");

View File

@ -5,7 +5,10 @@
#define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) (c >= (int)'!')
typedef struct dyn_buffer {
char *buf;
@ -21,8 +24,10 @@ typedef struct text_buffer {
dyn_buffer_t dyn_buffer;
} text_buffer_t;
char *abspath(const char * path);
char *abspath(const char *path);
char *expandpath(const char *path);
dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
@ -56,14 +61,16 @@ text_buffer_t text_buffer_create(int max_size);
void text_buffer_terminate_string(text_buffer_t *buf);
int text_buffer_append_string(text_buffer_t *buf, char * str);
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len);
int text_buffer_append_string0(text_buffer_t *buf, char *str);
int text_buffer_append_char(text_buffer_t *buf, int c);
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime);
int incremental_get(GHashTable *table, unsigned long inode_no);
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
#endif

1
utf8.h Submodule

@ -0,0 +1 @@
Subproject commit 2a7c5bfa952816cd1c674e604d31c6e0268ba770