utf8 update + bug fixes

This commit is contained in:
simon 2019-11-03 07:44:54 -05:00
parent f8f1a27180
commit 7962a994e2
28 changed files with 1022 additions and 503 deletions

9
.gitmodules vendored
View File

@ -16,3 +16,12 @@
[submodule "lmdb"] [submodule "lmdb"]
path = lmdb path = lmdb
url = https://github.com/LMDB/lmdb url = https://github.com/LMDB/lmdb
[submodule "utf8.h"]
path = utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "lib/openjpeg"]
path = lib/openjpeg
url = https://github.com/uclouvain/openjpeg
[submodule "lib/harfbuzz"]
path = lib/harfbuzz
url = https://github.com/harfbuzz/harfbuzz

View File

@ -37,6 +37,9 @@ if (WITH_SIST2)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
) )
endif () endif ()
@ -67,6 +70,9 @@ if (WITH_SIST2_SCAN)
lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c lmdb/libraries/liblmdb/lmdb.h lmdb/libraries/liblmdb/mdb.c
lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c lmdb/libraries/liblmdb/midl.h lmdb/libraries/liblmdb/midl.c
src/cli.c src/cli.h src/cli.c src/cli.h
# utf8.h
utf8.h/utf8.h
) )
endif () endif ()
@ -116,10 +122,10 @@ if (WITH_SIST2)
target_compile_options(sist2 target_compile_options(sist2
PRIVATE PRIVATE
# -O3 -Ofast
# -march=native # -march=native
# -fno-stack-protector -fno-stack-protector
# -fomit-frame-pointer -fomit-frame-pointer
) )
TARGET_LINK_LIBRARIES( TARGET_LINK_LIBRARIES(
@ -150,6 +156,9 @@ if (WITH_SIST2)
m m
bz2 bz2
magic magic
harfbuzz
openjp2
freetype
) )
endif () endif ()
@ -187,7 +196,7 @@ if (WITH_SIST2_SCAN)
) )
target_compile_options(sist2_scan target_compile_options(sist2_scan
PRIVATE PRIVATE
-O3 -Ofast
# -march=native # -march=native
-fno-stack-protector -fno-stack-protector
-fomit-frame-pointer -fomit-frame-pointer
@ -215,6 +224,9 @@ if (WITH_SIST2_SCAN)
pthread pthread
m m
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype
) )
endif () endif ()

View File

@ -58,7 +58,7 @@ File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | title | pdf,xps,cbz,cbr,fb2,epub | MuPDF | yes | yes, `png` | title |
`audio/*` | libav | - | yes, `jpeg` | ID3 tags | `audio/*` | libav | - | yes, `jpeg` | ID3 tags |
`video/*` | libav | - | yes, `jpeg` | *planned* | `video/*` | libav | - | yes, `jpeg` | title, comment |
`image/*` | libav | - | yes, `jpeg` | *planned* | `image/*` | libav | - | yes, `jpeg` | *planned* |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | *(none)* | yes | no | - |

2
cJSON

@ -1 +1 @@
Subproject commit 2de7d04aaf67598e23d06573dcb4e370ebbad410 Subproject commit 533ff8a783be0d5c81581ab17cd2aeba3f0044c1

1
lib/harfbuzz Submodule

@ -0,0 +1 @@
Subproject commit 7cde68f10cdf2c3ff77c1d9077475c0fc034c75c

1
lib/openjpeg Submodule

@ -0,0 +1 @@
Subproject commit 5875a6b44618fb7dfd5cd6d742533eaee2014060

View File

@ -254,6 +254,7 @@ text/mcf, mcf
text/pascal, pas text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
text/richtext, rt|rtf|rtx text/richtext, rt|rtf|rtx
text/rtf,
text/scriplet, wsc text/scriplet, wsc
text/x-awk, awk text/x-awk, awk
!video/x-jng, jng !video/x-jng, jng
@ -263,7 +264,7 @@ image/x-xwindowdump, xwd
!image/vnd.adobe.photoshop, psd !image/vnd.adobe.photoshop, psd
text/tab-separated-values, tsv text/tab-separated-values, tsv
text/troff, man|me|ms|roff|t|tr text/troff, man|me|ms|roff|t|tr
text/uri-list, uni|unis|uri|uris text/uri-list, uji|unis|uri|uris
text/vnd.abc, abc text/vnd.abc, abc
text/vnd.fmi.flexstor, flx text/vnd.fmi.flexstor, flx
text/vnd.wap.wmlscript, wmls text/vnd.wap.wmlscript, wmls
@ -360,3 +361,53 @@ application/x-wine-extension-ini,
application/x-cbz, cbz application/x-cbz, cbz
application/x-cbr, cbr application/x-cbr, cbr
application/x-ms-compress-szdd, fon application/x-ms-compress-szdd, fon
application/x-atari-7800-rom, a78
application/x-nes-rom, nes
application/x-font-pfm, pfm
application/x-gettext-translation,
image/wmf,
application/pgp-keys,
image/x-3ds, 3ds
application/x-lz4, lz4
application/vnd.openxmlformats-officedocument.presentationml.presentation, pptx
application/vnd.oasis.opendocument.presentation, odp
application/x-msaccess, accdb
application/vnd.oasis.opendocument.spreadsheet, ods
audio/x-aiff, aiff|aif
text/x-ms-regedit, reg
application/x-gamecube-rom,
application/x-nintendo-ds-rom,
text/x-objective-c,
application/x-font-gdos,
application/x-apple-diskimage,
application/x-zstd, zst
video/x-m4v, m4v
message/news,
application/vnd.symbian.install,
application/x-lzh-compressed,
application/x-dosdriver,
application/vnd.tcpdump.pcap, pcap
x-epoc/x-sisx-app,
application/x-avira-qua,
video/MP2T,
application/x-snappy-framed,
application/x-lz4+json, jsonlz4
application/x-dmp, dmp
application/zlib, z
application/x-pgp-keyring,
application/x-gdbm,
application/x-font-pf2, pf2
application/x-zip,
application/x-coredump,
application/x-java-jmod, jmod
application/x-terminfo,
application/x-terminfo2,
application/x-arc,
application/vnd.lotus-1-2-3,
image/x-win-bitmap,
application/x-maxis-dbpf,
text/PGP,
audio/x-hx-aac-adts,
application/x-chrome-extension,
image/heic, heic
image/x-gem,
1 application/arj arj
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
256 text/richtext rt|rtf|rtx
257 text/rtf
258 text/scriplet wsc
259 text/x-awk awk
260 !video/x-jng jng
264 !image/vnd.adobe.photoshop psd
265 text/tab-separated-values tsv
266 text/troff man|me|ms|roff|t|tr
267 text/uri-list uni|unis|uri|uris uji|unis|uri|uris
268 text/vnd.abc abc
269 text/vnd.fmi.flexstor flx
270 text/vnd.wap.wmlscript wmls
361 application/x-cbz cbz
362 application/x-cbr cbr
363 application/x-ms-compress-szdd fon
364 application/x-atari-7800-rom a78
365 application/x-nes-rom nes
366 application/x-font-pfm pfm
367 application/x-gettext-translation
368 image/wmf
369 application/pgp-keys
370 image/x-3ds 3ds
371 application/x-lz4 lz4
372 application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
373 application/vnd.oasis.opendocument.presentation odp
374 application/x-msaccess accdb
375 application/vnd.oasis.opendocument.spreadsheet ods
376 audio/x-aiff aiff|aif
377 text/x-ms-regedit reg
378 application/x-gamecube-rom
379 application/x-nintendo-ds-rom
380 text/x-objective-c
381 application/x-font-gdos
382 application/x-apple-diskimage
383 application/x-zstd zst
384 video/x-m4v m4v
385 message/news
386 application/vnd.symbian.install
387 application/x-lzh-compressed
388 application/x-dosdriver
389 application/vnd.tcpdump.pcap pcap
390 x-epoc/x-sisx-app
391 application/x-avira-qua
392 video/MP2T
393 application/x-snappy-framed
394 application/x-lz4+json jsonlz4
395 application/x-dmp dmp
396 application/zlib z
397 application/x-pgp-keyring
398 application/x-gdbm
399 application/x-font-pf2 pf2
400 application/x-zip
401 application/x-coredump
402 application/x-java-jmod jmod
403 application/x-terminfo
404 application/x-terminfo2
405 application/x-arc
406 application/vnd.lotus-1-2-3
407 image/x-win-bitmap
408 application/x-maxis-dbpf
409 text/PGP
410 audio/x-hx-aac-adts
411 application/x-chrome-extension
412 image/heic heic
413 image/x-gem

View File

@ -2,12 +2,28 @@
cd lib cd lib
cd mupdf cd mupdf
HAVE_X11=no HAVE_GLUT=no make -j 4 USE_SYSTEM_HARFBUZZ=yes USE_SYSTEM_OPENJPEG=yes HAVE_X11=no HAVE_GLUT=no make -j 4
cd .. cd ..
mv mupdf/build/release/libmupdf.a . mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a . mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
make -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
make -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg # ffmpeg
cd ffmpeg cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \ ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \

View File

@ -9,6 +9,22 @@ cd ..
mv mupdf/build/release/libmupdf.a . mv mupdf/build/release/libmupdf.a .
mv mupdf/build/release/libmupdf-third.a . mv mupdf/build/release/libmupdf-third.a .
# openjp2
cd openjpeg
#cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3 -march=native -DNDEBUG"
cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS="-O3"
gmake -j 4
cd ..
mv openjpeg/bin/libopenjp2.a .
# harfbuzz
cd harfbuzz
./autogen.sh
./configure --disable-shared --enable-static
gmake -j 4
cd ..
mv harfbuzz/src/.libs/libharfbuzz.a .
# ffmpeg # ffmpeg
cd ffmpeg cd ffmpeg
./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \ ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay \

View File

@ -12,7 +12,8 @@ major_mime = {
"audio": 7, "audio": 7,
"image": 8, "image": 8,
"text": 9, "text": 9,
"application": 10 "application": 10,
"x-epoc": 11,
} }
pdf = ( pdf = (

View File

@ -102,7 +102,14 @@ void elastic_flush() {
cJSON *ret_json = cJSON_Parse(r->body); cJSON *ret_json = cJSON_Parse(r->body);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) { if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
fprintf(stderr, "%s\n", r->body); cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
char* str = cJSON_Print(err);
fprintf(stderr, "%s\n", str);
cJSON_free(str);
}
}
} }
cJSON_Delete(ret_json); cJSON_Delete(ret_json);

View File

@ -236,7 +236,7 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
case MetaTitle: { case MetaTitle: {
buf.cur = 0; buf.cur = 0;
while ((c = getc(file)) != 0) { while ((c = getc(file)) != 0) {
if (!(SHOULD_IGNORE_CHAR(c)) || c == ' ') { if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c); dyn_buffer_write_char(&buf, (char) c);
} }
} }
@ -244,6 +244,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf); cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break; break;
} }
default:
fprintf(stderr, "Invalid meta key (corrupt index): %x", key);
break;
} }
key = getc(file); key = getc(file);

View File

@ -3,10 +3,9 @@
parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) { parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath); int len = (int) strlen(filepath);
parse_job_t *job = malloc(sizeof(parse_job_t) + len); parse_job_t *job = malloc(sizeof(parse_job_t) + len);
memcpy(&(job->filepath), filepath, len + 1); strcpy(job->filepath, filepath);
job->base = base; job->base = base;
char *p = strrchr(filepath + base, '.'); char *p = strrchr(filepath + base, '.');
if (p != NULL) { if (p != NULL) {

View File

@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0.14"; static const char *const Version = "1.1.0";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
@ -52,11 +52,10 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_qscale = args->quality; ScanCtx.tn_qscale = args->quality;
ScanCtx.tn_size = args->size; ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size; ScanCtx.content_size = args->content_size;
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
ScanCtx.threads = args->threads; ScanCtx.threads = args->threads;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path)); strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name)); strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strcpy(ScanCtx.index.desc.root, args->path); strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root); ScanCtx.index.desc.root_len = (short) strlen(ScanCtx.index.desc.root);
init_dir(ScanCtx.index.path); init_dir(ScanCtx.index.path);
@ -93,6 +92,8 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)); printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
} }
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool); tpool_destroy(ScanCtx.pool);

View File

@ -184,10 +184,14 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
for (int i = 0; i < num_chars; i++) { for (int i = 0; i < num_chars; i++) {
char c = font_name[i]; char c = font_name[i];
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) { if (err != 0) {
continue; continue;
} }
}
glyph_t glyph = ft_glyph_to_glyph(face->glyph); glyph_t glyph = ft_glyph_to_glyph(face->glyph);
pen.x += kerning_offset(c, pc, face); pen.x += kerning_offset(c, pc, face);

View File

@ -1,6 +1,9 @@
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h" #include "src/ctx.h"
#define MIN_SIZE 32
__always_inline
AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
@ -22,8 +25,8 @@ AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
return jpeg; return jpeg;
} }
__always_inline
AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) { AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) {
AVFrame *scaled_frame = av_frame_alloc();
int dstW; int dstW;
int dstH; int dstH;
@ -41,16 +44,22 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
} }
} }
if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) {
return NULL;
}
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext( struct SwsContext *ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt, decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P, dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0 SWS_FAST_BILINEAR, 0, 0, 0
); );
int dst_buf_len = avpicture_get_size(AV_PIX_FMT_YUVJ420P, dstW, dstH); int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len); uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len);
avpicture_fill((AVPicture *) scaled_frame, dst_buf, AV_PIX_FMT_YUVJ420P, dstW, dstH); av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx, sws_scale(ctx,
(const uint8_t *const *) frame->data, frame->linesize, (const uint8_t *const *) frame->data, frame->linesize,
@ -81,7 +90,7 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
if (read_frame_ret != 0) { if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) { if (read_frame_ret != AVERROR_EOF) {
fprintf(stderr, "Error reading frame: %s\n", av_err2str(read_frame_ret)); fprintf(stderr, "Error reading frame: %d\n", read_frame_ret);
} }
av_frame_free(&frame); av_frame_free(&frame);
av_packet_unref(&avPacket); av_packet_unref(&avPacket);
@ -108,35 +117,40 @@ AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int st
} }
#define APPEND_TAG_META(doc, tag, keyname) \ #define APPEND_TAG_META(doc, tag, keyname) \
text_buffer_t tex = text_buffer_create(4096); \ text_buffer_t tex = text_buffer_create(-1); \
text_buffer_append_string(&tex, tag->value); \ text_buffer_append_string0(&tex, tag->value); \
meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \ meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \
meta_tag->key = keyname; \ meta_tag->key = keyname; \
strcpy(meta_tag->strval, tex.dyn_buffer.buf); \ strcpy(meta_tag->strval, tex.dyn_buffer.buf); \
APPEND_META(doc, meta_tag) \ APPEND_META(doc, meta_tag) \
text_buffer_destroy(&tex); text_buffer_destroy(&tex);
__always_inline
void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) { void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) {
AVDictionaryEntry *tag = NULL; AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key; char key[32];
for (; *key; ++key) *key = (char) tolower(*key); strncpy(key, tag->key, sizeof(key));
if (strcmp(tag->key, "artist") == 0) { char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "artist") == 0) {
APPEND_TAG_META(doc, tag, MetaArtist) APPEND_TAG_META(doc, tag, MetaArtist)
} else if (strcmp(tag->key, "genre") == 0) { } else if (strcmp(key, "genre") == 0) {
APPEND_TAG_META(doc, tag, MetaGenre) APPEND_TAG_META(doc, tag, MetaGenre)
} else if (strcmp(tag->key, "title") == 0) { } else if (strcmp(key, "title") == 0) {
APPEND_TAG_META(doc, tag, MetaTitle) APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "album_artist") == 0) { } else if (strcmp(key, "album_artist") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbumArtist) APPEND_TAG_META(doc, tag, MetaAlbumArtist)
} else if (strcmp(tag->key, "album") == 0) { } else if (strcmp(key, "album") == 0) {
APPEND_TAG_META(doc, tag, MetaAlbum) APPEND_TAG_META(doc, tag, MetaAlbum)
} }
} }
} }
__always_inline
void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include_audio_tags) { void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include_audio_tags) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
@ -146,17 +160,20 @@ void append_video_meta(AVFormatContext *pFormatCtx, document_t *doc, int include
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate; meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->intval = pFormatCtx->bit_rate; meta_bitrate->longval = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate) APPEND_META(doc, meta_bitrate)
AVDictionaryEntry *tag = NULL; AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
char *key = tag->key; char key[32];
for (; *key; ++key) *key = (char) tolower(*key); strncpy(key, tag->key, sizeof(key));
if (strcmp(tag->key, "title") == 0 && include_audio_tags) { char *ptr = key;
for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr);
if (strcmp(key, "title") == 0 && include_audio_tags) {
APPEND_TAG_META(doc, tag, MetaTitle) APPEND_TAG_META(doc, tag, MetaTitle)
} else if (strcmp(tag->key, "comment") == 0) { } else if (strcmp(key, "comment") == 0) {
APPEND_TAG_META(doc, tag, MetaContent) APPEND_TAG_META(doc, tag, MetaContent)
} }
} }
@ -174,7 +191,7 @@ void parse_media(const char *filepath, document_t *doc) {
} }
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) { if (res < 0) {
printf("ERR%s %s\n", filepath, av_err2str(res)); fprintf(stderr, "media error: %s %s\n", filepath, av_err2str(res));
return; return;
} }
@ -224,7 +241,7 @@ void parse_media(const char *filepath, document_t *doc) {
append_video_meta(pFormatCtx, doc, audio_stream == -1); append_video_meta(pFormatCtx, doc, audio_stream == -1);
} }
if (stream->codecpar->width <= 20 || stream->codecpar->height <= 20) { if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
return; return;
@ -259,6 +276,14 @@ void parse_media(const char *filepath, document_t *doc) {
// Scale frame // Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size); AVFrame *scaled_frame = scale_frame(decoder, frame, ScanCtx.tn_size);
if (scaled_frame == NULL) {
av_frame_free(&frame);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
// Encode frame to jpeg // Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale); AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ScanCtx.tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame); avcodec_send_frame(jpeg_encoder, scaled_frame);
@ -268,7 +293,8 @@ void parse_media(const char *filepath, document_t *doc) {
avcodec_receive_packet(jpeg_encoder, &jpeg_packet); avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail // Save thumbnail
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size); store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
jpeg_packet.size);
av_packet_unref(&jpeg_packet); av_packet_unref(&jpeg_packet);
av_frame_free(&frame); av_frame_free(&frame);

View File

@ -5,6 +5,7 @@
#include "src/sist.h" #include "src/sist.h"
#define MIN_VIDEO_SIZE 1024 * 64 #define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
void parse_media(const char * filepath, document_t *doc); void parse_media(const char * filepath, document_t *doc);

View File

@ -1,10 +1,12 @@
#include "mime.h" #include "mime.h"
unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) { unsigned int mime_get_mime_by_ext(GHashTable *ext_table, const char * ext) {
char lower[64]; char lower[8];
char *p = lower; char *p = lower;
while ((*ext)) { int cnt = 0;
while ((*ext) != '\0' && cnt + 1 < sizeof(lower)) {
*p++ = (char)tolower(*ext++); *p++ = (char)tolower(*ext++);
cnt++;
} }
*p = '\0'; *p = '\0';
return (size_t) g_hash_table_lookup(ext_table, lower); return (size_t) g_hash_table_lookup(ext_table, lower);

View File

@ -39,334 +39,385 @@ enum mime {
application_oda=655391, application_oda=655391,
application_ogg=655392, application_ogg=655392,
application_pdf=655393 | 0x40000000, application_pdf=655393 | 0x40000000,
application_pgp_signature=655394, application_pgp_keys=655394,
application_pkcs7_signature=655395, application_pgp_signature=655395,
application_pkix_cert=655396, application_pkcs7_signature=655396,
application_postscript=655397, application_pkix_cert=655397,
application_pro_eng=655398, application_postscript=655398,
application_ringing_tones=655399, application_pro_eng=655399,
application_smil=655400, application_ringing_tones=655400,
application_solids=655401, application_smil=655401,
application_sounder=655402, application_solids=655402,
application_step=655403, application_sounder=655403,
application_streamingmedia=655404, application_step=655404,
application_vda=655405, application_streamingmedia=655405,
application_vnd_fdf=655406, application_vda=655406,
application_vnd_font_fontforge_sfd=655407, application_vnd_fdf=655407,
application_vnd_hp_hpgl=655408, application_vnd_font_fontforge_sfd=655408,
application_vnd_iccprofile=655409, application_vnd_hp_hpgl=655409,
application_vnd_ms_cab_compressed=655410, application_vnd_iccprofile=655410,
application_vnd_ms_excel=655411, application_vnd_lotus_1_2_3=655411,
application_vnd_ms_fontobject=655412, application_vnd_ms_cab_compressed=655412,
application_vnd_ms_opentype=655413 | 0x20000000, application_vnd_ms_excel=655413,
application_vnd_ms_pki_certstore=655414, application_vnd_ms_fontobject=655414,
application_vnd_ms_pki_pko=655415, application_vnd_ms_opentype=655415 | 0x20000000,
application_vnd_ms_pki_seccat=655416, application_vnd_ms_pki_certstore=655416,
application_vnd_ms_powerpoint=655417, application_vnd_ms_pki_pko=655417,
application_vnd_ms_project=655418, application_vnd_ms_pki_seccat=655418,
application_vnd_oasis_opendocument_base=655419, application_vnd_ms_powerpoint=655419,
application_vnd_oasis_opendocument_formula=655420, application_vnd_ms_project=655420,
application_vnd_oasis_opendocument_graphics=655421, application_vnd_oasis_opendocument_base=655421,
application_vnd_oasis_opendocument_text=655422, application_vnd_oasis_opendocument_formula=655422,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655423, application_vnd_oasis_opendocument_graphics=655423,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655424, application_vnd_oasis_opendocument_presentation=655424,
application_vnd_wap_wmlc=655425, application_vnd_oasis_opendocument_spreadsheet=655425,
application_vnd_wap_wmlscriptc=655426, application_vnd_oasis_opendocument_text=655426,
application_vnd_xara=655427, application_vnd_openxmlformats_officedocument_presentationml_presentation=655427,
application_vocaltec_media_desc=655428, application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655428,
application_vocaltec_media_file=655429, application_vnd_openxmlformats_officedocument_wordprocessingml_document=655429,
application_winhelp=655430, application_vnd_symbian_install=655430,
application_wordperfect=655431, application_vnd_tcpdump_pcap=655431,
application_wordperfect6_0=655432, application_vnd_wap_wmlc=655432,
application_wordperfect6_1=655433, application_vnd_wap_wmlscriptc=655433,
application_x_123=655434, application_vnd_xara=655434,
application_x_7z_compressed=655435, application_vocaltec_media_desc=655435,
application_x_aim=655436, application_vocaltec_media_file=655436,
application_x_archive=655437, application_winhelp=655437,
application_x_authorware_bin=655438, application_wordperfect=655438,
application_x_authorware_map=655439, application_wordperfect6_0=655439,
application_x_authorware_seg=655440, application_wordperfect6_1=655440,
application_x_bcpio=655441, application_x_123=655441,
application_x_bittorrent=655442, application_x_7z_compressed=655442,
application_x_bsh=655443, application_x_aim=655443,
application_x_bytecode_python=655444, application_x_apple_diskimage=655444,
application_x_bzip=655445, application_x_arc=655445,
application_x_bzip2=655446, application_x_archive=655446,
application_x_cbr=655447, application_x_atari_7800_rom=655447,
application_x_cbz=655448 | 0x40000000, application_x_authorware_bin=655448,
application_x_cdlink=655449, application_x_authorware_map=655449,
application_x_chat=655450, application_x_authorware_seg=655450,
application_x_cocoa=655451, application_x_avira_qua=655451,
application_x_conference=655452, application_x_bcpio=655452,
application_x_cpio=655453, application_x_bittorrent=655453,
application_x_dbf=655454, application_x_bsh=655454,
application_x_dbt=655455, application_x_bytecode_python=655455,
application_x_debian_package=655456, application_x_bzip=655456,
application_x_deepv=655457, application_x_bzip2=655457,
application_x_director=655458, application_x_cbr=655458,
application_x_dosexec=655459, application_x_cbz=655459 | 0x40000000,
application_x_dvi=655460, application_x_cdlink=655460,
application_x_elc=655461, application_x_chat=655461,
application_x_chrome_extension=655462,
application_x_cocoa=655463,
application_x_conference=655464,
application_x_coredump=655465,
application_x_cpio=655466,
application_x_dbf=655467,
application_x_dbt=655468,
application_x_debian_package=655469,
application_x_deepv=655470,
application_x_director=655471,
application_x_dmp=655472,
application_x_dosdriver=655473,
application_x_dosexec=655474,
application_x_dvi=655475,
application_x_elc=655476,
application_x_empty=1, application_x_empty=1,
application_x_envoy=655463, application_x_envoy=655478,
application_x_esrehber=655464, application_x_esrehber=655479,
application_x_excel=655465, application_x_excel=655480,
application_x_executable=655466, application_x_executable=655481,
application_x_font_sfn=655467, application_x_font_gdos=655482,
application_x_font_ttf=655468 | 0x20000000, application_x_font_pf2=655483,
application_x_freelance=655469, application_x_font_pfm=655484,
application_x_git=655470, application_x_font_sfn=655485,
application_x_gsp=655471, application_x_font_ttf=655486 | 0x20000000,
application_x_gss=655472, application_x_freelance=655487,
application_x_gtar=655473, application_x_gamecube_rom=655488,
application_x_gzip=655474, application_x_gdbm=655489,
application_x_hdf=655475, application_x_gettext_translation=655490,
application_x_helpfile=655476, application_x_git=655491,
application_x_httpd_imap=655477, application_x_gsp=655492,
application_x_ima=655478, application_x_gss=655493,
application_x_innosetup=655479, application_x_gtar=655494,
application_x_internett_signup=655480, application_x_gzip=655495,
application_x_inventor=655481, application_x_hdf=655496,
application_x_ip2=655482, application_x_helpfile=655497,
application_x_java_applet=655483, application_x_httpd_imap=655498,
application_x_java_commerce=655484, application_x_ima=655499,
application_x_java_image=655485, application_x_innosetup=655500,
application_x_java_keystore=655486, application_x_internett_signup=655501,
application_x_kdelnk=655487, application_x_inventor=655502,
application_x_koan=655488, application_x_ip2=655503,
application_x_latex=655489, application_x_java_applet=655504,
application_x_livescreen=655490, application_x_java_commerce=655505,
application_x_lotus=655491, application_x_java_image=655506,
application_x_lzh=655492, application_x_java_jmod=655507,
application_x_lzx=655493, application_x_java_keystore=655508,
application_x_mach_binary=655494, application_x_kdelnk=655509,
application_x_mach_executable=655495, application_x_koan=655510,
application_x_magic_cap_package_1_0=655496, application_x_latex=655511,
application_x_mathcad=655497, application_x_livescreen=655512,
application_x_meme=655498, application_x_lotus=655513,
application_x_midi=655499, application_x_lz4=655514,
application_x_mif=655500, application_x_lz4_json=655515,
application_x_mix_transfer=655501, application_x_lzh=655516,
application_x_mobipocket_ebook=655502, application_x_lzh_compressed=655517,
application_x_ms_compress_szdd=655503, application_x_lzx=655518,
application_x_ms_pdb=655504, application_x_mach_binary=655519,
application_x_ms_reader=655505, application_x_mach_executable=655520,
application_x_navi_animation=655506, application_x_magic_cap_package_1_0=655521,
application_x_navidoc=655507, application_x_mathcad=655522,
application_x_navimap=655508, application_x_maxis_dbpf=655523,
application_x_navistyle=655509, application_x_meme=655524,
application_x_netcdf=655510, application_x_midi=655525,
application_x_newton_compatible_pkg=655511, application_x_mif=655526,
application_x_object=655512, application_x_mix_transfer=655527,
application_x_omc=655513, application_x_mobipocket_ebook=655528,
application_x_omcdatamaker=655514, application_x_ms_compress_szdd=655529,
application_x_omcregerator=655515, application_x_ms_pdb=655530,
application_x_pagemaker=655516, application_x_ms_reader=655531,
application_x_pcl=655517, application_x_msaccess=655532,
application_x_pixclscript=655518, application_x_navi_animation=655533,
application_x_pkcs7_certreqresp=655519, application_x_navidoc=655534,
application_x_pkcs7_signature=655520, application_x_navimap=655535,
application_x_project=655521, application_x_navistyle=655536,
application_x_qpro=655522, application_x_nes_rom=655537,
application_x_rar=655523, application_x_netcdf=655538,
application_x_rpm=655524, application_x_newton_compatible_pkg=655539,
application_x_sdp=655525, application_x_nintendo_ds_rom=655540,
application_x_sea=655526, application_x_object=655541,
application_x_seelogo=655527, application_x_omc=655542,
application_x_setupscript=655528, application_x_omcdatamaker=655543,
application_x_shar=655529, application_x_omcregerator=655544,
application_x_sharedlib=655530, application_x_pagemaker=655545,
application_x_shockwave_flash=655531, application_x_pcl=655546,
application_x_sprite=655532, application_x_pgp_keyring=655547,
application_x_sqlite3=655533, application_x_pixclscript=655548,
application_x_sv4cpio=655534, application_x_pkcs7_certreqresp=655549,
application_x_sv4crc=655535, application_x_pkcs7_signature=655550,
application_x_tar=655536, application_x_project=655551,
application_x_tbook=655537, application_x_qpro=655552,
application_x_tex_tfm=655538, application_x_rar=655553,
application_x_texinfo=655539, application_x_rpm=655554,
application_x_ustar=655540, application_x_sdp=655555,
application_x_visio=655541, application_x_sea=655556,
application_x_vnd_audioexplosion_mzz=655542, application_x_seelogo=655557,
application_x_vnd_ls_xpix=655543, application_x_setupscript=655558,
application_x_vrml=655544, application_x_shar=655559,
application_x_wais_source=655545, application_x_sharedlib=655560,
application_x_wine_extension_ini=655546, application_x_shockwave_flash=655561,
application_x_wintalk=655547, application_x_snappy_framed=655562,
application_x_world=655548, application_x_sprite=655563,
application_x_wri=655549, application_x_sqlite3=655564,
application_x_x509_ca_cert=655550, application_x_sv4cpio=655565,
application_x_xz=655551, application_x_sv4crc=655566,
application_xml=655552, application_x_tar=655567,
application_zip=655553, application_x_tbook=655568,
audio_it=458946, application_x_terminfo=655569,
audio_make=458947, application_x_terminfo2=655570,
audio_mid=458948, application_x_tex_tfm=655571,
audio_midi=458949, application_x_texinfo=655572,
audio_mp4=458950, application_x_ustar=655573,
audio_mpeg=458951, application_x_visio=655574,
audio_ogg=458952, application_x_vnd_audioexplosion_mzz=655575,
audio_s3m=458953, application_x_vnd_ls_xpix=655576,
audio_tsp_audio=458954, application_x_vrml=655577,
audio_tsplayer=458955, application_x_wais_source=655578,
audio_vnd_qcelp=458956, application_x_wine_extension_ini=655579,
audio_voxware=458957, application_x_wintalk=655580,
audio_x_flac=458958, application_x_world=655581,
audio_x_gsm=458959, application_x_wri=655582,
audio_x_jam=458960, application_x_x509_ca_cert=655583,
audio_x_liveaudio=458961, application_x_xz=655584,
audio_x_m4a=458962, application_x_zip=655585,
audio_x_midi=458963, application_x_zstd=655586,
audio_x_mod=458964, application_xml=655587,
audio_x_mp4a_latm=458965, application_zip=655588,
audio_x_mpeg_3=458966, application_zlib=655589,
audio_x_mpequrl=458967, audio_it=458982,
audio_x_nspaudio=458968, audio_make=458983,
audio_x_pn_realaudio=458969, audio_mid=458984,
audio_x_psid=458970, audio_midi=458985,
audio_x_realaudio=458971, audio_mp4=458986,
audio_x_twinvq=458972, audio_mpeg=458987,
audio_x_twinvq_plugin=458973, audio_ogg=458988,
audio_x_voc=458974, audio_s3m=458989,
audio_x_wav=458975, audio_tsp_audio=458990,
audio_xm=458976, audio_tsplayer=458991,
font_otf=327905 | 0x20000000, audio_vnd_qcelp=458992,
font_sfnt=327906 | 0x20000000, audio_voxware=458993,
font_woff=327907 | 0x20000000, audio_x_aiff=458994,
font_woff2=327908 | 0x20000000, audio_x_flac=458995,
image_cmu_raster=524517, audio_x_gsm=458996,
image_fif=524518, audio_x_hx_aac_adts=458997,
image_florian=524519, audio_x_jam=458998,
image_g3fax=524520, audio_x_liveaudio=458999,
image_gif=524521, audio_x_m4a=459000,
image_ief=524522, audio_x_midi=459001,
image_jpeg=524523, audio_x_mod=459002,
image_jutvision=524524, audio_x_mp4a_latm=459003,
image_naplps=524525, audio_x_mpeg_3=459004,
image_pict=524526, audio_x_mpequrl=459005,
image_png=524527, audio_x_nspaudio=459006,
image_svg=524528 | 0x80000000, audio_x_pn_realaudio=459007,
image_svg_xml=524529 | 0x80000000, audio_x_psid=459008,
image_tiff=524530, audio_x_realaudio=459009,
image_vnd_adobe_photoshop=524531 | 0x80000000, audio_x_twinvq=459010,
image_vnd_djvu=524532 | 0x80000000, audio_x_twinvq_plugin=459011,
image_vnd_fpx=524533, audio_x_voc=459012,
image_vnd_microsoft_icon=524534, audio_x_wav=459013,
image_vnd_rn_realflash=524535, audio_xm=459014,
image_vnd_rn_realpix=524536, font_otf=327943 | 0x20000000,
image_vnd_wap_wbmp=524537, font_sfnt=327944 | 0x20000000,
image_vnd_xiff=524538, font_woff=327945 | 0x20000000,
image_webp=524539, font_woff2=327946 | 0x20000000,
image_x_cmu_raster=524540, image_cmu_raster=524555,
image_x_cur=524541, image_fif=524556,
image_x_dwg=524542, image_florian=524557,
image_x_eps=524543, image_g3fax=524558,
image_x_exr=524544, image_gif=524559,
image_x_icns=524545, image_heic=524560,
image_x_icon=524546 | 0x80000000, image_ief=524561,
image_x_jg=524547, image_jpeg=524562,
image_x_jps=524548, image_jutvision=524563,
image_x_ms_bmp=524549, image_naplps=524564,
image_x_niff=524550, image_pict=524565,
image_x_pcx=524551, image_png=524566,
image_x_pict=524552, image_svg=524567 | 0x80000000,
image_x_portable_bitmap=524553, image_svg_xml=524568 | 0x80000000,
image_x_portable_graymap=524554, image_tiff=524569,
image_x_portable_pixmap=524555, image_vnd_adobe_photoshop=524570 | 0x80000000,
image_x_quicktime=524556, image_vnd_djvu=524571 | 0x80000000,
image_x_rgb=524557, image_vnd_fpx=524572,
image_x_tga=524558, image_vnd_microsoft_icon=524573,
image_x_tiff=524559, image_vnd_rn_realflash=524574,
image_x_xcf=524560 | 0x80000000, image_vnd_rn_realpix=524575,
image_x_xpixmap=524561 | 0x80000000, image_vnd_wap_wbmp=524576,
image_x_xwindowdump=524562, image_vnd_xiff=524577,
message_rfc822=196883, image_webp=524578,
model_vnd_dwf=65812, image_wmf=524579,
model_vnd_gdl=65813, image_x_3ds=524580,
model_vnd_gs_gdl=65814, image_x_cmu_raster=524581,
model_vrml=65815, image_x_cur=524582,
model_x_pov=65816, image_x_dwg=524583,
text_asp=590105, image_x_eps=524584,
text_css=590106, image_x_exr=524585,
text_html=590107, image_x_gem=524586,
text_javascript=590108, image_x_icns=524587,
text_mcf=590109, image_x_icon=524588 | 0x80000000,
text_pascal=590110, image_x_jg=524589,
text_plain=590111, image_x_jps=524590,
text_richtext=590112, image_x_ms_bmp=524591,
text_scriplet=590113, image_x_niff=524592,
text_tab_separated_values=590114, image_x_pcx=524593,
text_troff=590115, image_x_pict=524594,
text_uri_list=590116, image_x_portable_bitmap=524595,
text_vnd_abc=590117, image_x_portable_graymap=524596,
text_vnd_fmi_flexstor=590118, image_x_portable_pixmap=524597,
text_vnd_wap_wml=590119, image_x_quicktime=524598,
text_vnd_wap_wmlscript=590120, image_x_rgb=524599,
text_webviewhtml=590121, image_x_tga=524600,
text_x_Algol68=590122, image_x_tiff=524601,
text_x_asm=590123, image_x_win_bitmap=524602,
text_x_audiosoft_intra=590124, image_x_xcf=524603 | 0x80000000,
text_x_awk=590125, image_x_xpixmap=524604 | 0x80000000,
text_x_bcpl=590126, image_x_xwindowdump=524605,
text_x_c=590127, message_news=196926,
text_x_c__=590128, message_rfc822=196927,
text_x_component=590129, model_vnd_dwf=65856,
text_x_diff=590130, model_vnd_gdl=65857,
text_x_fortran=590131, model_vnd_gs_gdl=65858,
text_x_java=590132, model_vrml=65859,
text_x_la_asf=590133, model_x_pov=65860,
text_x_lisp=590134, text_PGP=590149,
text_x_m=590135, text_asp=590150,
text_x_m4=590136, text_css=590151,
text_x_makefile=590137, text_html=590152,
text_x_msdos_batch=590138, text_javascript=590153,
text_x_pascal=590139, text_mcf=590154,
text_x_perl=590140, text_pascal=590155,
text_x_php=590141, text_plain=590156,
text_x_po=590142, text_richtext=590157,
text_x_python=590143, text_rtf=590158,
text_x_ruby=590144, text_scriplet=590159,
text_x_sass=590145, text_tab_separated_values=590160,
text_x_scss=590146, text_troff=590161,
text_x_server_parsed_html=590147, text_uri_list=590162,
text_x_setext=590148, text_vnd_abc=590163,
text_x_sgml=590149, text_vnd_fmi_flexstor=590164,
text_x_shellscript=590150, text_vnd_wap_wml=590165,
text_x_speech=590151, text_vnd_wap_wmlscript=590166,
text_x_tcl=590152, text_webviewhtml=590167,
text_x_tex=590153, text_x_Algol68=590168,
text_x_uil=590154, text_x_asm=590169,
text_x_uuencode=590155, text_x_audiosoft_intra=590170,
text_x_vcalendar=590156, text_x_awk=590171,
text_x_vcard=590157, text_x_bcpl=590172,
text_xml=590158, text_x_c=590173,
video_animaflex=393551, text_x_c__=590174,
video_avi=393552, text_x_component=590175,
video_avs_video=393553, text_x_diff=590176,
video_mp4=393554, text_x_fortran=590177,
video_mpeg=393555, text_x_java=590178,
video_quicktime=393556, text_x_la_asf=590179,
video_vdo=393557, text_x_lisp=590180,
video_vivo=393558, text_x_m=590181,
video_vnd_rn_realvideo=393559, text_x_m4=590182,
video_vosaic=393560, text_x_makefile=590183,
video_webm=393561, text_x_ms_regedit=590184,
video_x_amt_demorun=393562, text_x_msdos_batch=590185,
video_x_amt_showrun=393563, text_x_objective_c=590186,
video_x_atomic3d_feature=393564, text_x_pascal=590187,
video_x_dl=393565, text_x_perl=590188,
video_x_dv=393566, text_x_php=590189,
video_x_fli=393567, text_x_po=590190,
video_x_flv=393568, text_x_python=590191,
video_x_isvideo=393569, text_x_ruby=590192,
video_x_jng=393570 | 0x80000000, text_x_sass=590193,
video_x_matroska=393571, text_x_scss=590194,
video_x_mng=393572, text_x_server_parsed_html=590195,
video_x_motion_jpeg=393573, text_x_setext=590196,
video_x_ms_asf=393574, text_x_sgml=590197,
video_x_msvideo=393575, text_x_shellscript=590198,
video_x_qtc=393576, text_x_speech=590199,
video_x_sgi_movie=393577, text_x_tcl=590200,
text_x_tex=590201,
text_x_uil=590202,
text_x_uuencode=590203,
text_x_vcalendar=590204,
text_x_vcard=590205,
text_xml=590206,
video_MP2T=393599,
video_animaflex=393600,
video_avi=393601,
video_avs_video=393602,
video_mp4=393603,
video_mpeg=393604,
video_quicktime=393605,
video_vdo=393606,
video_vivo=393607,
video_vnd_rn_realvideo=393608,
video_vosaic=393609,
video_webm=393610,
video_x_amt_demorun=393611,
video_x_amt_showrun=393612,
video_x_atomic3d_feature=393613,
video_x_dl=393614,
video_x_dv=393615,
video_x_fli=393616,
video_x_flv=393617,
video_x_isvideo=393618,
video_x_jng=393619 | 0x80000000,
video_x_m4v=393620,
video_x_matroska=393621,
video_x_mng=393622,
video_x_motion_jpeg=393623,
video_x_ms_asf=393624,
video_x_msvideo=393625,
video_x_qtc=393626,
video_x_sgi_movie=393627,
x_epoc_x_sisx_app=721308,
}; };
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj"; case application_arj: return "application/arj";
@ -625,6 +676,7 @@ case text_mcf: return "text/mcf";
case text_pascal: return "text/pascal"; case text_pascal: return "text/pascal";
case text_plain: return "text/plain"; case text_plain: return "text/plain";
case text_richtext: return "text/richtext"; case text_richtext: return "text/richtext";
case text_rtf: return "text/rtf";
case text_scriplet: return "text/scriplet"; case text_scriplet: return "text/scriplet";
case text_x_awk: return "text/x-awk"; case text_x_awk: return "text/x-awk";
case video_x_jng: return "video/x-jng"; case video_x_jng: return "video/x-jng";
@ -730,6 +782,56 @@ case application_x_wine_extension_ini: return "application/x-wine-extension-ini"
case application_x_cbz: return "application/x-cbz"; case application_x_cbz: return "application/x-cbz";
case application_x_cbr: return "application/x-cbr"; case application_x_cbr: return "application/x-cbr";
case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd"; case application_x_ms_compress_szdd: return "application/x-ms-compress-szdd";
case application_x_atari_7800_rom: return "application/x-atari-7800-rom";
case application_x_nes_rom: return "application/x-nes-rom";
case application_x_font_pfm: return "application/x-font-pfm";
case application_x_gettext_translation: return "application/x-gettext-translation";
case image_wmf: return "image/wmf";
case application_pgp_keys: return "application/pgp-keys";
case image_x_3ds: return "image/x-3ds";
case application_x_lz4: return "application/x-lz4";
case application_vnd_openxmlformats_officedocument_presentationml_presentation: return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
case application_vnd_oasis_opendocument_presentation: return "application/vnd.oasis.opendocument.presentation";
case application_x_msaccess: return "application/x-msaccess";
case application_vnd_oasis_opendocument_spreadsheet: return "application/vnd.oasis.opendocument.spreadsheet";
case audio_x_aiff: return "audio/x-aiff";
case text_x_ms_regedit: return "text/x-ms-regedit";
case application_x_gamecube_rom: return "application/x-gamecube-rom";
case application_x_nintendo_ds_rom: return "application/x-nintendo-ds-rom";
case text_x_objective_c: return "text/x-objective-c";
case application_x_font_gdos: return "application/x-font-gdos";
case application_x_apple_diskimage: return "application/x-apple-diskimage";
case application_x_zstd: return "application/x-zstd";
case video_x_m4v: return "video/x-m4v";
case message_news: return "message/news";
case application_vnd_symbian_install: return "application/vnd.symbian.install";
case application_x_lzh_compressed: return "application/x-lzh-compressed";
case application_x_dosdriver: return "application/x-dosdriver";
case application_vnd_tcpdump_pcap: return "application/vnd.tcpdump.pcap";
case x_epoc_x_sisx_app: return "x-epoc/x-sisx-app";
case application_x_avira_qua: return "application/x-avira-qua";
case video_MP2T: return "video/MP2T";
case application_x_snappy_framed: return "application/x-snappy-framed";
case application_x_lz4_json: return "application/x-lz4+json";
case application_x_dmp: return "application/x-dmp";
case application_zlib: return "application/zlib";
case application_x_pgp_keyring: return "application/x-pgp-keyring";
case application_x_gdbm: return "application/x-gdbm";
case application_x_font_pf2: return "application/x-font-pf2";
case application_x_zip: return "application/x-zip";
case application_x_coredump: return "application/x-coredump";
case application_x_java_jmod: return "application/x-java-jmod";
case application_x_terminfo: return "application/x-terminfo";
case application_x_terminfo2: return "application/x-terminfo2";
case application_x_arc: return "application/x-arc";
case application_vnd_lotus_1_2_3: return "application/vnd.lotus-1-2-3";
case image_x_win_bitmap: return "image/x-win-bitmap";
case application_x_maxis_dbpf: return "application/x-maxis-dbpf";
case text_PGP: return "text/PGP";
case audio_x_hx_aac_adts: return "audio/x-hx-aac-adts";
case application_x_chrome_extension: return "application/x-chrome-extension";
case image_heic: return "image/heic";
case image_x_gem: return "image/x-gem";
default: return NULL;}} default: return NULL;}}
GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_ext_table() {GHashTable *ext_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(ext_table, "arj", (gpointer)application_arj); g_hash_table_insert(ext_table, "arj", (gpointer)application_arj);
@ -1097,7 +1199,7 @@ g_hash_table_insert(ext_table, "ms", (gpointer)text_troff);
g_hash_table_insert(ext_table, "roff", (gpointer)text_troff); g_hash_table_insert(ext_table, "roff", (gpointer)text_troff);
g_hash_table_insert(ext_table, "t", (gpointer)text_troff); g_hash_table_insert(ext_table, "t", (gpointer)text_troff);
g_hash_table_insert(ext_table, "tr", (gpointer)text_troff); g_hash_table_insert(ext_table, "tr", (gpointer)text_troff);
g_hash_table_insert(ext_table, "uni", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uji", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "unis", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uri", (gpointer)text_uri_list);
g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list); g_hash_table_insert(ext_table, "uris", (gpointer)text_uri_list);
@ -1211,6 +1313,27 @@ g_hash_table_insert(ext_table, "hlp", (gpointer)application_winhelp);
g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz); g_hash_table_insert(ext_table, "cbz", (gpointer)application_x_cbz);
g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr); g_hash_table_insert(ext_table, "cbr", (gpointer)application_x_cbr);
g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd); g_hash_table_insert(ext_table, "fon", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(ext_table, "a78", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(ext_table, "nes", (gpointer)application_x_nes_rom);
g_hash_table_insert(ext_table, "pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(ext_table, "3ds", (gpointer)image_x_3ds);
g_hash_table_insert(ext_table, "lz4", (gpointer)application_x_lz4);
g_hash_table_insert(ext_table, "pptx", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(ext_table, "odp", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(ext_table, "accdb", (gpointer)application_x_msaccess);
g_hash_table_insert(ext_table, "ods", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(ext_table, "aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "aif", (gpointer)audio_x_aiff);
g_hash_table_insert(ext_table, "reg", (gpointer)text_x_ms_regedit);
g_hash_table_insert(ext_table, "zst", (gpointer)application_x_zstd);
g_hash_table_insert(ext_table, "m4v", (gpointer)video_x_m4v);
g_hash_table_insert(ext_table, "pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(ext_table, "jsonlz4", (gpointer)application_x_lz4_json);
g_hash_table_insert(ext_table, "dmp", (gpointer)application_x_dmp);
g_hash_table_insert(ext_table, "z", (gpointer)application_zlib);
g_hash_table_insert(ext_table, "pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(ext_table, "jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(ext_table, "heic", (gpointer)image_heic);
return ext_table;} return ext_table;}
GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal); GHashTable *mime_get_mime_table() {GHashTable *mime_table = g_hash_table_new(g_str_hash, g_str_equal);
g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj); g_hash_table_insert(mime_table, "application/arj", (gpointer)application_arj);
@ -1469,6 +1592,7 @@ g_hash_table_insert(mime_table, "text/mcf", (gpointer)text_mcf);
g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal); g_hash_table_insert(mime_table, "text/pascal", (gpointer)text_pascal);
g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain); g_hash_table_insert(mime_table, "text/plain", (gpointer)text_plain);
g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext); g_hash_table_insert(mime_table, "text/richtext", (gpointer)text_richtext);
g_hash_table_insert(mime_table, "text/rtf", (gpointer)text_rtf);
g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet); g_hash_table_insert(mime_table, "text/scriplet", (gpointer)text_scriplet);
g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk); g_hash_table_insert(mime_table, "text/x-awk", (gpointer)text_x_awk);
g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng); g_hash_table_insert(mime_table, "video/x-jng", (gpointer)video_x_jng);
@ -1574,5 +1698,55 @@ g_hash_table_insert(mime_table, "application/x-wine-extension-ini", (gpointer)ap
g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz); g_hash_table_insert(mime_table, "application/x-cbz", (gpointer)application_x_cbz);
g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr); g_hash_table_insert(mime_table, "application/x-cbr", (gpointer)application_x_cbr);
g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd); g_hash_table_insert(mime_table, "application/x-ms-compress-szdd", (gpointer)application_x_ms_compress_szdd);
g_hash_table_insert(mime_table, "application/x-atari-7800-rom", (gpointer)application_x_atari_7800_rom);
g_hash_table_insert(mime_table, "application/x-nes-rom", (gpointer)application_x_nes_rom);
g_hash_table_insert(mime_table, "application/x-font-pfm", (gpointer)application_x_font_pfm);
g_hash_table_insert(mime_table, "application/x-gettext-translation", (gpointer)application_x_gettext_translation);
g_hash_table_insert(mime_table, "image/wmf", (gpointer)image_wmf);
g_hash_table_insert(mime_table, "application/pgp-keys", (gpointer)application_pgp_keys);
g_hash_table_insert(mime_table, "image/x-3ds", (gpointer)image_x_3ds);
g_hash_table_insert(mime_table, "application/x-lz4", (gpointer)application_x_lz4);
g_hash_table_insert(mime_table, "application/vnd.openxmlformats-officedocument.presentationml.presentation", (gpointer)application_vnd_openxmlformats_officedocument_presentationml_presentation);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.presentation", (gpointer)application_vnd_oasis_opendocument_presentation);
g_hash_table_insert(mime_table, "application/x-msaccess", (gpointer)application_x_msaccess);
g_hash_table_insert(mime_table, "application/vnd.oasis.opendocument.spreadsheet", (gpointer)application_vnd_oasis_opendocument_spreadsheet);
g_hash_table_insert(mime_table, "audio/x-aiff", (gpointer)audio_x_aiff);
g_hash_table_insert(mime_table, "text/x-ms-regedit", (gpointer)text_x_ms_regedit);
g_hash_table_insert(mime_table, "application/x-gamecube-rom", (gpointer)application_x_gamecube_rom);
g_hash_table_insert(mime_table, "application/x-nintendo-ds-rom", (gpointer)application_x_nintendo_ds_rom);
g_hash_table_insert(mime_table, "text/x-objective-c", (gpointer)text_x_objective_c);
g_hash_table_insert(mime_table, "application/x-font-gdos", (gpointer)application_x_font_gdos);
g_hash_table_insert(mime_table, "application/x-apple-diskimage", (gpointer)application_x_apple_diskimage);
g_hash_table_insert(mime_table, "application/x-zstd", (gpointer)application_x_zstd);
g_hash_table_insert(mime_table, "video/x-m4v", (gpointer)video_x_m4v);
g_hash_table_insert(mime_table, "message/news", (gpointer)message_news);
g_hash_table_insert(mime_table, "application/vnd.symbian.install", (gpointer)application_vnd_symbian_install);
g_hash_table_insert(mime_table, "application/x-lzh-compressed", (gpointer)application_x_lzh_compressed);
g_hash_table_insert(mime_table, "application/x-dosdriver", (gpointer)application_x_dosdriver);
g_hash_table_insert(mime_table, "application/vnd.tcpdump.pcap", (gpointer)application_vnd_tcpdump_pcap);
g_hash_table_insert(mime_table, "x-epoc/x-sisx-app", (gpointer)x_epoc_x_sisx_app);
g_hash_table_insert(mime_table, "application/x-avira-qua", (gpointer)application_x_avira_qua);
g_hash_table_insert(mime_table, "video/MP2T", (gpointer)video_MP2T);
g_hash_table_insert(mime_table, "application/x-snappy-framed", (gpointer)application_x_snappy_framed);
g_hash_table_insert(mime_table, "application/x-lz4+json", (gpointer)application_x_lz4_json);
g_hash_table_insert(mime_table, "application/x-dmp", (gpointer)application_x_dmp);
g_hash_table_insert(mime_table, "application/zlib", (gpointer)application_zlib);
g_hash_table_insert(mime_table, "application/x-pgp-keyring", (gpointer)application_x_pgp_keyring);
g_hash_table_insert(mime_table, "application/x-gdbm", (gpointer)application_x_gdbm);
g_hash_table_insert(mime_table, "application/x-font-pf2", (gpointer)application_x_font_pf2);
g_hash_table_insert(mime_table, "application/x-zip", (gpointer)application_x_zip);
g_hash_table_insert(mime_table, "application/x-coredump", (gpointer)application_x_coredump);
g_hash_table_insert(mime_table, "application/x-java-jmod", (gpointer)application_x_java_jmod);
g_hash_table_insert(mime_table, "application/x-terminfo", (gpointer)application_x_terminfo);
g_hash_table_insert(mime_table, "application/x-terminfo2", (gpointer)application_x_terminfo2);
g_hash_table_insert(mime_table, "application/x-arc", (gpointer)application_x_arc);
g_hash_table_insert(mime_table, "application/vnd.lotus-1-2-3", (gpointer)application_vnd_lotus_1_2_3);
g_hash_table_insert(mime_table, "image/x-win-bitmap", (gpointer)image_x_win_bitmap);
g_hash_table_insert(mime_table, "application/x-maxis-dbpf", (gpointer)application_x_maxis_dbpf);
g_hash_table_insert(mime_table, "text/PGP", (gpointer)text_PGP);
g_hash_table_insert(mime_table, "audio/x-hx-aac-adts", (gpointer)audio_x_hx_aac_adts);
g_hash_table_insert(mime_table, "application/x-chrome-extension", (gpointer)application_x_chrome_extension);
g_hash_table_insert(mime_table, "image/heic", (gpointer)image_heic);
g_hash_table_insert(mime_table, "image/x-gem", (gpointer)image_x_gem);
return mime_table;} return mime_table;}
#endif #endif

View File

@ -1,7 +1,7 @@
#include "src/sist.h" #include "src/sist.h"
#include "src/ctx.h" #include "src/ctx.h"
__thread magic_t Magic; __thread magic_t Magic = NULL;
void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) { void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) {
@ -62,7 +62,7 @@ void parse(void *arg) {
if (job->info.st_size == 0) { if (job->info.st_size == 0) {
doc.mime = MIME_EMPTY; doc.mime = MIME_EMPTY;
} else if (*(job->filepath + job->ext) != '\0') { } else if (*(job->filepath + job->ext) != '\0' && (job->ext - job->base != 1)) {
doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); doc.mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
} }
@ -80,11 +80,18 @@ void parse(void *arg) {
bytes_read = read(fd, buf, PARSE_BUF_SIZE); bytes_read = read(fd, buf, PARSE_BUF_SIZE);
if (bytes_read == -1) {
perror("read");
close(fd);
free(job);
return;
}
const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read); const char *magic_mime_str = magic_buffer(Magic, buf, bytes_read);
if (magic_mime_str != NULL) { if (magic_mime_str != NULL) {
doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str); doc.mime = mime_get_mime_by_string(ScanCtx.mime_table, magic_mime_str);
if (doc.mime == 0) { if (doc.mime == 0) {
fprintf(stderr, "Couldn't find mime %s, %s!\n", magic_mime_str, job->filepath + job->base); fprintf(stderr, "Couldn't find mime %s, %s\n", magic_mime_str, job->filepath + job->base);
} }
} }
} }
@ -93,7 +100,8 @@ void parse(void *arg) {
if (!(SHOULD_PARSE(doc.mime))) { if (!(SHOULD_PARSE(doc.mime))) {
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) || mmime == MimeAudio || mmime == MimeImage) { } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(job->filepath, &doc); parse_media(job->filepath, &doc);
} else if (IS_PDF(doc.mime)) { } else if (IS_PDF(doc.mime)) {

View File

@ -1,10 +1,22 @@
#include <src/ctx.h>
#include "pdf.h" #include "pdf.h"
#include "src/ctx.h" #include "src/ctx.h"
fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) { fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = fz_load_page(ctx, fzdoc, 0); int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
err = 1;
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
fz_rect bounds = fz_bound_page(ctx, cover); fz_rect bounds = fz_bound_page(ctx, cover);
float scale; float scale;
@ -24,24 +36,49 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
fz_clear_pixmap_with_value(ctx, pixmap, 0xFF); fz_clear_pixmap_with_value(ctx, pixmap, 0xFF);
fz_device *dev = fz_new_draw_device(ctx, m, pixmap); fz_device *dev = fz_new_draw_device(ctx, m, pixmap);
pthread_mutex_lock(&ScanCtx.mupdf_mu); fz_var(err);
fz_try(ctx) fz_try(ctx)
{
pthread_mutex_lock(&ScanCtx.mupdf_mu);
fz_run_page(ctx, cover, dev, fz_identity, NULL); fz_run_page(ctx, cover, dev, fz_identity, NULL);
}
fz_always(ctx) fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu); {
fz_catch(ctx) fz_close_device(ctx, dev);
fz_rethrow(ctx);
fz_drop_device(ctx, dev); fz_drop_device(ctx, dev);
pthread_mutex_unlock(&ScanCtx.mupdf_mu);
}
fz_catch(ctx)
err = ctx->error.errcode;
fz_buffer *fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params); if (err != 0) {
fz_drop_page(ctx, cover);
fz_drop_pixmap(ctx, pixmap);
return NULL;
}
fz_buffer *fzbuf = NULL;
fz_var(fzbuf);
fz_var(err);
fz_try(ctx)
fzbuf = fz_new_buffer_from_pixmap_as_png(ctx, pixmap, fz_default_color_params);
fz_catch(ctx)
err = ctx->error.errcode;
if (err == 0) {
unsigned char *tn_buf; unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf); size_t tn_len = fz_buffer_storage(ctx, fzbuf, &tn_buf);
store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len); store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_pixmap(ctx, pixmap);
fz_drop_buffer(ctx, fzbuf); fz_drop_buffer(ctx, fzbuf);
fz_drop_pixmap(ctx, pixmap);
if (err != 0) {
fz_drop_page(ctx, cover);
return NULL;
}
return cover; return cover;
} }
@ -49,6 +86,32 @@ fz_page *render_cover(fz_context *ctx, document_t *doc, fz_document *fzdoc) {
void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {} void fz_noop_callback(__attribute__((unused)) void *user, __attribute__((unused)) const char *message) {}
void init_ctx(fz_context *ctx) {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
}
int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
if (block->type != FZ_STEXT_BLOCK_TEXT) {
return 0;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
c = c->next;
}
line = line->next;
}
return 0;
}
void parse_pdf(void *buf, size_t buf_len, document_t *doc) { void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
static int mu_is_initialized = 0; static int mu_is_initialized = 0;
@ -57,105 +120,140 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
mu_is_initialized = 1; mu_is_initialized = 1;
} }
fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); fz_context *ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
fz_stream *stream = NULL;
fz_document *fzdoc = NULL;
fz_var(stream); init_ctx(ctx);
int err = 0;
fz_document *fzdoc = NULL;
fz_stream *stream = NULL;
fz_var(fzdoc); fz_var(fzdoc);
fz_var(stream);
fz_var(err);
fz_try(ctx) fz_try(ctx)
{ {
fz_disable_icc(ctx);
fz_register_document_handlers(ctx);
//disable warnings
ctx->warn.print = fz_noop_callback;
ctx->error.print = fz_noop_callback;
stream = fz_open_memory(ctx, buf, buf_len); stream = fz_open_memory(ctx, buf, buf_len);
fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream); fzdoc = fz_open_document_with_stream(ctx, mime_get_mime_text(doc->mime), stream);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
char title[4096] = {'\0',}; char title[4096] = {'\0',};
fz_try(ctx)
fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title)); fz_lookup_metadata(ctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title));
printf("Title: %s\n", title); //todo rmv fz_catch(ctx)
;
if (strlen(title) > 0) { if (strlen(title) > 0) {
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title) + 1); meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title));
meta_content->key = MetaTitle; meta_content->key = MetaTitle;
strcpy(meta_content->strval, title); strcpy(meta_content->strval, title);
APPEND_META(doc, meta_content) APPEND_META(doc, meta_content)
} }
int page_count = fz_count_pages(ctx, fzdoc); int page_count = -1;
fz_var(err);
fz_try(ctx)
page_count = fz_count_pages(ctx, fzdoc);
fz_catch(ctx)
err = ctx->error.errcode;
if (err) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_page *cover = render_cover(ctx, doc, fzdoc); fz_page *cover = render_cover(ctx, doc, fzdoc);
if (cover == NULL) {
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_options opts; fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size); text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) { for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page; if (current_page == 0) { fz_page *page = NULL;
if (current_page == 0) {
page = cover; page = cover;
} else { } else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page); page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
} }
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page)); fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts); fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
pthread_mutex_lock(&ScanCtx.mupdf_mu); fz_var(err);
fz_try(ctx) fz_try(ctx)
fz_run_page_contents(ctx, page, dev, fz_identity, NULL); fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx) fz_always(ctx)
pthread_mutex_unlock(&ScanCtx.mupdf_mu); {
fz_catch(ctx) fz_close_device(ctx, dev);
fz_rethrow(ctx);
fz_drop_device(ctx, dev); fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block; fz_stext_block *block = stext->first_block;
while (block != NULL) { while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (block->type != FZ_STEXT_BLOCK_TEXT) { if (ret == TEXT_BUF_FULL) {
block = block->next; break;
continue;
}
fz_stext_line *line = block->u.t.first_line;
while (line != NULL) {
fz_stext_char *c = line->first_char;
while (c != NULL) {
if (text_buffer_append_char(&text_buf, c->c) == TEXT_BUF_FULL) {
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
goto write_loop_end;
}
c = c->next;
}
line = line->next;
} }
block = block->next; block = block->next;
} }
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext); fz_drop_stext_page(ctx, stext);
fz_drop_page(ctx, page);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
break;
}
} }
write_loop_end:;
text_buffer_terminate_string(&text_buf); text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur); meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent; meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur); memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
text_buffer_destroy(&text_buf);
APPEND_META(doc, meta_content) APPEND_META(doc, meta_content)
}
fz_always(ctx)
{
fz_drop_stream(ctx, stream); fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc); fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx); fz_drop_context(ctx);
} fz_catch(ctx) {
fprintf(stderr, "Error %s %s\n", doc->filepath, ctx->error.message); text_buffer_destroy(&text_buf);
}
} }

View File

@ -27,17 +27,14 @@ void parse_text(int bytes_read, int *fd, char *buf, document_t *doc) {
read(*fd, intermediate_buf + bytes_read, to_read); read(*fd, intermediate_buf + bytes_read, to_read);
} }
text_buffer_t tex = text_buffer_create(ScanCtx.content_size);
text_buffer_append_string(&tex, intermediate_buf, intermediate_buf_len);
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size); meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur);
for (int i = 0; i < intermediate_buf_len; i++) {
text_buffer_append_char(&text_buf, *(intermediate_buf + i));
}
text_buffer_terminate_string(&text_buf);
meta_line_t *meta = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta->key = MetaContent; meta->key = MetaContent;
strcpy(meta->strval, text_buf.dyn_buffer.buf); strcpy(meta->strval, tex.dyn_buffer.buf);
text_buffer_destroy(&text_buf);
free(intermediate_buf);
APPEND_META(doc, meta) APPEND_META(doc, meta)
free(intermediate_buf);
text_buffer_destroy(&tex);
} }

View File

@ -16,6 +16,7 @@
#include <libswscale/swscale.h> #include <libswscale/swscale.h>
#include <libswresample/swresample.h> #include <libswresample/swresample.h>
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <ctype.h> #include <ctype.h>
#include <mupdf/fitz.h> #include <mupdf/fitz.h>
#include <mupdf/pdf.h> #include <mupdf/pdf.h>
@ -49,6 +50,7 @@
#include "parsing/media.h" #include "parsing/media.h"
#include "parsing/font.h" #include "parsing/font.h"
#include "cli.h" #include "cli.h"
#include "utf8.h/utf8.h"
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
#include "src/index/elastic.h" #include "src/index/elastic.h"

View File

@ -25,6 +25,7 @@ typedef struct tpool {
int done_cnt; int done_cnt;
int stop; int stop;
void (*cleanup_func)(); void (*cleanup_func)();
} tpool_t; } tpool_t;
@ -100,7 +101,7 @@ static void *tpool_worker(void *arg) {
tpool_t *pool = arg; tpool_t *pool = arg;
while (1) { while (1) {
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) { if (pool->stop) {
break; break;
} }
@ -192,7 +193,7 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->done_cnt = 0; pool->done_cnt = 0;
pool->stop = 0; pool->stop = 0;
pool->cleanup_func = cleanup_func; pool->cleanup_func = cleanup_func;
pool->threads = malloc(sizeof(pthread_t) * thread_cnt); pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL); pthread_mutex_init(&(pool->work_mutex), NULL);
@ -202,11 +203,14 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
pool->work_head = NULL; pool->work_head = NULL;
pool->work_tail = NULL; pool->work_tail = NULL;
for (size_t i = 0; i < thread_cnt; i++) { return pool;
}
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i]; pthread_t thread = pool->threads[i];
pthread_create(&thread, NULL, tpool_worker, pool); pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread); pthread_detach(thread);
} }
return pool;
} }

View File

@ -9,6 +9,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg); typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(size_t num, void (*cleanup_func)()); tpool_t *tpool_create(size_t num, void (*cleanup_func)());
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm); void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg); int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);

View File

@ -89,10 +89,71 @@ void text_buffer_terminate_string(text_buffer_t *buf) {
dyn_buffer_write_char(&buf->dyn_buffer, '\0'); dyn_buffer_write_char(&buf->dyn_buffer, '\0');
} }
int text_buffer_append_string(text_buffer_t *buf, char * str) { __always_inline
char * ptr = str; int utf8_validchr(const char* s) {
while (*ptr) { if (0x00 == (0x80 & *s)) {
text_buffer_append_char(buf, *ptr++); return TRUE;
} else if (0xf0 == (0xf8 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) ||
(0x80 != (0xc0 & s[3]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[4])) {
return FALSE;
}
if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) {
return FALSE;
}
} else if (0xe0 == (0xf0 & *s)) {
if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) {
return FALSE;
}
if (0x80 == (0xc0 & s[3])) {
return FALSE;
}
if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) {
return FALSE;
}
} else if (0xc0 == (0xe0 & *s)) {
if (0x80 != (0xc0 & s[1])) {
return FALSE;
}
if (0x80 == (0xc0 & s[2])) {
return FALSE;
}
if (0 == (0x1e & s[0])) {
return FALSE;
}
} else {
return FALSE;
}
return TRUE;
}
int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0' && ((char*)v - str + 4) < len; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
}
text_buffer_terminate_string(buf);
}
int text_buffer_append_string0(text_buffer_t *buf, char *str) {
utf8_int32_t c;
for (void *v = utf8codepoint(str, &c); c != '\0'; v = utf8codepoint(v, &c)) {
if (utf8_validchr(v)) {
text_buffer_append_char(buf, c);
}
} }
text_buffer_terminate_string(buf); text_buffer_terminate_string(buf);
} }
@ -104,15 +165,31 @@ int text_buffer_append_char(text_buffer_t *buf, int c) {
dyn_buffer_write_char(&buf->dyn_buffer, ' '); dyn_buffer_write_char(&buf->dyn_buffer, ' ');
buf->last_char_was_whitespace = TRUE; buf->last_char_was_whitespace = TRUE;
if (buf->dyn_buffer.cur >= buf->max_size) { if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL; return TEXT_BUF_FULL;
} }
} }
} else { } else {
buf->last_char_was_whitespace = FALSE; buf->last_char_was_whitespace = FALSE;
dyn_buffer_write_char(&buf->dyn_buffer, (char) c); grow_buffer_small(&buf->dyn_buffer);
if (buf->dyn_buffer.cur >= buf->max_size) { if (0 == ((utf8_int32_t) 0xffffff80 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c;
} else if (0 == ((utf8_int32_t) 0xfffff800 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else if (0 == ((utf8_int32_t) 0xffff0000 & c)) {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
} else {
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f);
*(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f);
}
if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) {
return TEXT_BUF_FULL; return TEXT_BUF_FULL;
} }
} }

View File

@ -5,7 +5,10 @@
#define TEXT_BUF_FULL -1 #define TEXT_BUF_FULL -1
#define INITIAL_BUF_SIZE 1024 * 16 #define INITIAL_BUF_SIZE 1024 * 16
#define SHOULD_IGNORE_CHAR(c) c < '0' || c > 'z'
#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c))
#define SHOULD_KEEP_CHAR(c) (c >= (int)'!')
typedef struct dyn_buffer { typedef struct dyn_buffer {
char *buf; char *buf;
@ -22,7 +25,9 @@ typedef struct text_buffer {
} text_buffer_t; } text_buffer_t;
char *abspath(const char *path); char *abspath(const char *path);
char *expandpath(const char *path); char *expandpath(const char *path);
dyn_buffer_t url_escape(char *str); dyn_buffer_t url_escape(char *str);
void progress_bar_print(double percentage, size_t tn_size, size_t index_size); void progress_bar_print(double percentage, size_t tn_size, size_t index_size);
@ -56,14 +61,16 @@ text_buffer_t text_buffer_create(int max_size);
void text_buffer_terminate_string(text_buffer_t *buf); void text_buffer_terminate_string(text_buffer_t *buf);
int text_buffer_append_string(text_buffer_t *buf, char * str); int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len);
int text_buffer_append_string0(text_buffer_t *buf, char *str);
int text_buffer_append_char(text_buffer_t *buf, int c); int text_buffer_append_char(text_buffer_t *buf, int c);
void incremental_put(GHashTable *table, unsigned long inode_no, int mtime); void incremental_put(GHashTable *table, unsigned long inode_no, int mtime);
int incremental_get(GHashTable *table, unsigned long inode_no); int incremental_get(GHashTable *table, unsigned long inode_no);
int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no); int incremental_mark_file_for_copy(GHashTable *table, unsigned long inode_no);
#endif #endif

1
utf8.h Submodule

@ -0,0 +1 @@
Subproject commit 2a7c5bfa952816cd1c674e604d31c6e0268ba770