diff --git a/CMakeLists.txt b/CMakeLists.txt index eeaf652..617b96d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ endif() ExternalProject_Add( ffmpeg GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git - GIT_TAG "n4.3.2" + GIT_TAG "n4.4" UPDATE_COMMAND "" PATCH_COMMAND "" @@ -118,11 +118,31 @@ ExternalProject_Add( SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg) SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg) +#ExternalProject_Add( +# libwpd +# URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz +# +# UPDATE_COMMAND "" +# PATCH_COMMAND "" +# TEST_COMMAND "" +# CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared +# INSTALL_COMMAND "" +# +# PREFIX "third-party/ext_libwpd" +# SOURCE_DIR "third-party/ext_libwpd/src/libwpd" +# BINARY_DIR "third-party/ext_libwpd/src/libwpd" +# +# BUILD_COMMAND ${MAKE_EXE} -j33 +#) +#SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/) +#SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/) + add_dependencies( scan libmobi ffmpeg antiword +# libwpd ) target_link_libraries( @@ -141,10 +161,11 @@ target_link_libraries( ${MOBI_LIB_DIR}/libmobi.a +# ${WPD_LIB_DIR}/libwpd-0.9.a + ${FREETYPE_LIB} ${HARFBUZZ_LIB} ${JBIG2DEC_LIB} - # OpenSSL::SSL OpenSSL::Crypto stdc++ @@ -185,6 +206,7 @@ target_include_directories( ${LIBXML2_INCLUDE_DIR} ${FFMPEG_INCLUDE_DIR} ${MOBI_INCLUDE_DIR} +# ${WPD_INCLUDE_DIR} ) if (BUILD_TESTS) diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index 49dd242..4cdef33 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -131,7 +131,10 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1); - const uint8_t *in_data[1] = {pixmap->samples}; + unsigned char *samples = calloc(1, 1024 * 1024 * 1024); + memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h); + + const uint8_t *in_data[1] = {samples,}; int in_line_size[1] = {(int) pixmap->stride}; sws_scale(sws_ctx, @@ -147,7 +150,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d sws_freeContext(sws_ctx); // YUV420p -> JPEG - AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, 1.0f); + AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale); avcodec_send_frame(jpeg_encoder, scaled_frame); AVPacket jpeg_packet; @@ -157,6 +160,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d APPEND_TN_META(doc, pixmap->w, pixmap->h) ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size); + free(samples); av_packet_unref(&jpeg_packet); av_free(*scaled_frame->data); av_frame_free(&scaled_frame); @@ -185,10 +189,10 @@ void fz_warn_callback(void *user, const char *message) { static void init_fzctx(fz_context *fzctx, document_t *doc) { fz_register_document_handlers(fzctx); - static int mu_is_initialized = 0; + static int mu_is_initialized = FALSE; if (!mu_is_initialized) { pthread_mutex_init(&Mutex, NULL); - mu_is_initialized = 1; + mu_is_initialized = TRUE; } fzctx->warn.print_user = doc; @@ -294,7 +298,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi return; } - APPEND_INT_META(doc, MetaPages, page_count) + APPEND_LONG_META(doc, MetaPages, page_count) if (ctx->tn_size > 0) { if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) { diff --git a/libscan/ebook/ebook.h b/libscan/ebook/ebook.h index fbab41c..66c9999 100644 --- a/libscan/ebook/ebook.h +++ b/libscan/ebook/ebook.h @@ -14,6 +14,7 @@ typedef struct { logf_callback_t logf; store_callback_t store; int fast_epub_parse; + float tn_qscale; } scan_ebook_ctx_t; void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc); diff --git a/libscan/macros.h b/libscan/macros.h index cd6cb5e..fb7dc61 100644 --- a/libscan/macros.h +++ b/libscan/macros.h @@ -26,11 +26,11 @@ strcpy(meta_str->str_val, value); \ APPEND_META(doc, meta_str)} -#define APPEND_INT_META(doc, keyname, value) \ - {meta_line_t *meta_int = malloc(sizeof(meta_line_t)); \ - meta_int->key = keyname; \ - meta_int->int_val = value; \ - APPEND_META(doc, meta_int)} +#define APPEND_LONG_META(doc, keyname, value) \ + {meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \ + meta_long->key = keyname; \ + meta_long->long_val = value; \ + APPEND_META(doc, meta_long)} #define APPEND_TN_META(doc, width, height) \ {meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \ diff --git a/libscan/media/media.c b/libscan/media/media.c index 437e5e5..73dc52e 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -261,6 +261,9 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); meta_duration->key = MetaMediaDuration; meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE; + if (meta_duration->long_val > INT32_MAX) { + meta_duration->long_val = 0; + } APPEND_META(doc, meta_duration) meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); @@ -356,12 +359,12 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, meta_line_t *meta_w = malloc(sizeof(meta_line_t)); meta_w->key = MetaWidth; - meta_w->int_val = stream->codecpar->width; + meta_w->long_val = stream->codecpar->width; APPEND_META(doc, meta_w) meta_line_t *meta_h = malloc(sizeof(meta_line_t)); meta_h->key = MetaHeight; - meta_h->int_val = stream->codecpar->height; + meta_h->long_val = stream->codecpar->height; APPEND_META(doc, meta_h) video_stream = i; diff --git a/libscan/raw/raw.c b/libscan/raw/raw.c index 1ae9df1..8ee19df 100644 --- a/libscan/raw/raw.c +++ b/libscan/raw/raw.c @@ -114,8 +114,8 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { if (*libraw_lib->idata.software != '\0') { APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software) } - APPEND_INT_META(doc, MetaWidth, libraw_lib->sizes.width) - APPEND_INT_META(doc, MetaHeight, libraw_lib->sizes.height) + APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width) + APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height) char tmp[1024]; snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed); APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp) diff --git a/libscan/scan.h b/libscan/scan.h index 33a8680..fb40a47 100644 --- a/libscan/scan.h +++ b/libscan/scan.h @@ -13,27 +13,15 @@ #define SIST_SWS_ALGO SWS_LANCZOS -#define META_INT_MASK 0x8000 -#define META_STR_MASK 0x4000 -#define META_LONG_MASK 0x2000 - #define UNUSED(x) __attribute__((__unused__)) x -#define META_STR(id) ((unsigned) id) | ((unsigned) META_STR_MASK) -#define META_INT(id) ((unsigned) id) | ((unsigned) META_INT_MASK) -#define META_LONG(id) ((unsigned) id) | ((unsigned) META_LONG_MASK) - -#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK -#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK -#define IS_META_STR(key) (key & META_STR_MASK) == META_STR_MASK - typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len); typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...); typedef void (*log_callback_t)(const char *filepath, int level, char *str); typedef int scan_code_t; #define SCAN_OK (scan_code_t) 0 -#define SCAN_ERR_READ (scan_code_t) -1 +#define SCAN_ERR_READ (scan_code_t) (-1) #define LEVEL_DEBUG 0 #define LEVEL_INFO 1 @@ -56,41 +44,45 @@ typedef int scan_code_t; #define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); #define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); -// This is written to file as a 16-bit int! enum metakey { - MetaContent = META_STR(1), - MetaWidth = META_INT(2), - MetaHeight = META_INT(3), - MetaMediaDuration = META_LONG(4), - MetaMediaAudioCodec = META_STR(5), - MetaMediaVideoCodec = META_STR(6), - MetaMediaBitrate = META_LONG(7), - MetaArtist = META_STR(8), - MetaAlbum = META_STR(9), - MetaAlbumArtist = META_STR(10), - MetaGenre = META_STR(11), - MetaTitle = META_STR(12), - MetaFontName = META_STR(13), - MetaParent = META_STR(14), - MetaExifMake = META_STR(15), - MetaExifSoftware = META_STR(16), - MetaExifExposureTime = META_STR(17), - MetaExifFNumber = META_STR(18), - MetaExifFocalLength = META_STR(19), - MetaExifUserComment = META_STR(20), - MetaExifModel = META_STR(21), - MetaExifIsoSpeedRatings = META_STR(22), - MetaExifDateTime = META_STR(23), - MetaAuthor = META_STR(24), - MetaModifiedBy = META_STR(25), - MetaThumbnail = META_STR(26), - MetaPages = META_INT(27), - MetaExifGpsLongitudeDMS = META_STR(28), - MetaExifGpsLongitudeRef = META_STR(29), - MetaExifGpsLatitudeDMS = META_STR(30), - MetaExifGpsLatitudeRef = META_STR(31), - MetaExifGpsLatitudeDec = META_STR(32), - MetaExifGpsLongitudeDec = META_STR(33), + // String + MetaContent = 1, + MetaMediaAudioCodec, + MetaMediaVideoCodec, + MetaArtist, + MetaAlbum, + MetaAlbumArtist, + MetaGenre, + MetaTitle, + MetaFontName, + MetaParent, + MetaExifMake, + MetaExifSoftware, + MetaExifExposureTime, + MetaExifFNumber, + MetaExifFocalLength, + MetaExifUserComment, + MetaExifModel, + MetaExifIsoSpeedRatings, + MetaExifDateTime, + MetaAuthor, + MetaModifiedBy, + MetaThumbnail, + + // Number + MetaWidth, + MetaHeight, + MetaMediaDuration, + MetaMediaBitrate, + MetaPages, + + // ?? + MetaExifGpsLongitudeDMS, + MetaExifGpsLongitudeRef, + MetaExifGpsLatitudeDMS, + MetaExifGpsLatitudeRef, + MetaExifGpsLatitudeDec, + MetaExifGpsLongitudeDec, }; typedef struct meta_line { @@ -98,8 +90,8 @@ typedef struct meta_line { enum metakey key; union { char str_val[0]; - int int_val; unsigned long long_val; + double double_val; }; } meta_line_t; diff --git a/libscan/util.h b/libscan/util.h index 10578a7..d4a8e3d 100644 --- a/libscan/util.h +++ b/libscan/util.h @@ -255,7 +255,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t } utf8_int32_t c; - char tmp[16]; + char tmp[16] = {0}; do { ptr = (char *) utf8codepoint(ptr, &c); diff --git a/test/main.cpp b/test/main.cpp index 429fabb..af1e170 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -203,7 +203,7 @@ TEST(Ebook, CandlePdf) { ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4); ASSERT_NE(get_meta(&doc, MetaContent)->str_val[0], ' '); ASSERT_NE(size_before, store_size); - ASSERT_EQ(get_meta(&doc, MetaPages)->int_val, 16); + ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 16); cleanup(&doc, &f); } @@ -711,8 +711,8 @@ TEST(RAW, Panasonic) { ASSERT_STREQ(get_meta(&doc, MetaExifDateTime)->str_val, "2020:07:20 10:00:34"); ASSERT_STREQ(get_meta(&doc, MetaExifFocalLength)->str_val, "20.0"); ASSERT_STREQ(get_meta(&doc, MetaExifFNumber)->str_val, "2.0"); - ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 5200); - ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 3904); + ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 5200); + ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 3904); ASSERT_NE(size_before, store_size); cleanup(&doc, &f); @@ -747,8 +747,8 @@ TEST(RAW, Nikon) { ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw"); ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "D750"); ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Nikon"); - ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 6032); - ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 4032); + ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 6032); + ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 4032); ASSERT_NE(size_before, store_size); cleanup(&doc, &f); @@ -766,8 +766,8 @@ TEST(RAW, Sony) { ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw"); ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "ILCE-7RM3"); ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Sony"); - ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 7968); - ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 5320); + ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 7968); + ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 5320); ASSERT_NE(size_before, store_size); cleanup(&doc, &f); @@ -785,8 +785,8 @@ TEST(RAW, Olympus) { ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw"); ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "E-M5MarkII"); ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Olympus"); - ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 4640); - ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 3472); + ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 4640); + ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 3472); ASSERT_NE(size_before, store_size); cleanup(&doc, &f); @@ -803,8 +803,8 @@ TEST(RAW, Fuji) { ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw"); ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "X-T2"); ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Fujifilm"); - ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 6032); - ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 4028); + ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 6032); + ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 4028); ASSERT_NE(size_before, store_size); cleanup(&doc, &f); @@ -823,7 +823,7 @@ TEST(Msdoc, Test1Pdf) { ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr); ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION"); ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan"); - ASSERT_EQ(get_meta(&doc, MetaPages)->int_val, 57); + ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57); ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4); ASSERT_NE(size_before, store_size); @@ -978,6 +978,7 @@ int main(int argc, char **argv) { ebook_ctx.log = noop_log; ebook_ctx.logf = noop_logf; ebook_ctx.fast_epub_parse = 0; + ebook_ctx.tn_qscale = 1.0; ebook_500_ctx = ebook_ctx; ebook_500_ctx.content_size = 500;