Bug fixes, rework Meta types, scale ebook tn with ctx args

This commit is contained in:
simon987 2021-09-05 09:11:33 -04:00
parent 8a0ac8d0db
commit 722052e4e1
9 changed files with 100 additions and 77 deletions

View File

@ -97,7 +97,7 @@ endif()
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
GIT_TAG "n4.3.2"
GIT_TAG "n4.4"
UPDATE_COMMAND ""
PATCH_COMMAND ""
@ -118,11 +118,31 @@ ExternalProject_Add(
SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
SET(FFMPEG_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg)
#ExternalProject_Add(
# libwpd
# URL http://prdownloads.sourceforge.net/libwpd/libwpd-0.9.9.tar.gz
#
# UPDATE_COMMAND ""
# PATCH_COMMAND ""
# TEST_COMMAND ""
# CONFIGURE_COMMAND ./configure --without-docs --enable-static --disable-shared
# INSTALL_COMMAND ""
#
# PREFIX "third-party/ext_libwpd"
# SOURCE_DIR "third-party/ext_libwpd/src/libwpd"
# BINARY_DIR "third-party/ext_libwpd/src/libwpd"
#
# BUILD_COMMAND ${MAKE_EXE} -j33
#)
#SET(WPD_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/src/lib/.libs/)
#SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwpd/inc/)
add_dependencies(
scan
libmobi
ffmpeg
antiword
# libwpd
)
target_link_libraries(
@ -141,10 +161,11 @@ target_link_libraries(
${MOBI_LIB_DIR}/libmobi.a
# ${WPD_LIB_DIR}/libwpd-0.9.a
${FREETYPE_LIB}
${HARFBUZZ_LIB}
${JBIG2DEC_LIB}
# OpenSSL::SSL OpenSSL::Crypto
stdc++
@ -185,6 +206,7 @@ target_include_directories(
${LIBXML2_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIR}
${MOBI_INCLUDE_DIR}
# ${WPD_INCLUDE_DIR}
)
if (BUILD_TESTS)

View File

@ -131,7 +131,10 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h,
1);
const uint8_t *in_data[1] = {pixmap->samples};
unsigned char *samples = calloc(1, 1024 * 1024 * 1024);
memcpy(samples, pixmap->samples, pixmap->stride * pixmap->h);
const uint8_t *in_data[1] = {samples,};
int in_line_size[1] = {(int) pixmap->stride};
sws_scale(sws_ctx,
@ -147,7 +150,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
sws_freeContext(sws_ctx);
// YUV420p -> JPEG
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, 1.0f);
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
@ -157,6 +160,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
APPEND_TN_META(doc, pixmap->w, pixmap->h)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
@ -185,10 +189,10 @@ void fz_warn_callback(void *user, const char *message) {
static void init_fzctx(fz_context *fzctx, document_t *doc) {
fz_register_document_handlers(fzctx);
static int mu_is_initialized = 0;
static int mu_is_initialized = FALSE;
if (!mu_is_initialized) {
pthread_mutex_init(&Mutex, NULL);
mu_is_initialized = 1;
mu_is_initialized = TRUE;
}
fzctx->warn.print_user = doc;
@ -294,7 +298,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
return;
}
APPEND_INT_META(doc, MetaPages, page_count)
APPEND_LONG_META(doc, MetaPages, page_count)
if (ctx->tn_size > 0) {
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {

View File

@ -14,6 +14,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int fast_epub_parse;
float tn_qscale;
} scan_ebook_ctx_t;
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc);

View File

@ -26,11 +26,11 @@
strcpy(meta_str->str_val, value); \
APPEND_META(doc, meta_str)}
#define APPEND_INT_META(doc, keyname, value) \
{meta_line_t *meta_int = malloc(sizeof(meta_line_t)); \
meta_int->key = keyname; \
meta_int->int_val = value; \
APPEND_META(doc, meta_int)}
#define APPEND_LONG_META(doc, keyname, value) \
{meta_line_t *meta_long = malloc(sizeof(meta_line_t)); \
meta_long->key = keyname; \
meta_long->long_val = value; \
APPEND_META(doc, meta_long)}
#define APPEND_TN_META(doc, width, height) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \

View File

@ -261,6 +261,9 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
@ -356,12 +359,12 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->int_val = stream->codecpar->width;
meta_w->long_val = stream->codecpar->width;
APPEND_META(doc, meta_w)
meta_line_t *meta_h = malloc(sizeof(meta_line_t));
meta_h->key = MetaHeight;
meta_h->int_val = stream->codecpar->height;
meta_h->long_val = stream->codecpar->height;
APPEND_META(doc, meta_h)
video_stream = i;

View File

@ -114,8 +114,8 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (*libraw_lib->idata.software != '\0') {
APPEND_STR_META(doc, MetaExifSoftware, libraw_lib->idata.software)
}
APPEND_INT_META(doc, MetaWidth, libraw_lib->sizes.width)
APPEND_INT_META(doc, MetaHeight, libraw_lib->sizes.height)
APPEND_LONG_META(doc, MetaWidth, libraw_lib->sizes.width)
APPEND_LONG_META(doc, MetaHeight, libraw_lib->sizes.height)
char tmp[1024];
snprintf(tmp, sizeof(tmp), "%g", libraw_lib->other.iso_speed);
APPEND_STR_META(doc, MetaExifIsoSpeedRatings, tmp)

View File

@ -13,27 +13,15 @@
#define SIST_SWS_ALGO SWS_LANCZOS
#define META_INT_MASK 0x8000
#define META_STR_MASK 0x4000
#define META_LONG_MASK 0x2000
#define UNUSED(x) __attribute__((__unused__)) x
#define META_STR(id) ((unsigned) id) | ((unsigned) META_STR_MASK)
#define META_INT(id) ((unsigned) id) | ((unsigned) META_INT_MASK)
#define META_LONG(id) ((unsigned) id) | ((unsigned) META_LONG_MASK)
#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK
#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK
#define IS_META_STR(key) (key & META_STR_MASK) == META_STR_MASK
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
typedef int scan_code_t;
#define SCAN_OK (scan_code_t) 0
#define SCAN_ERR_READ (scan_code_t) -1
#define SCAN_ERR_READ (scan_code_t) (-1)
#define LEVEL_DEBUG 0
#define LEVEL_INFO 1
@ -56,41 +44,45 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
// This is written to file as a 16-bit int!
enum metakey {
MetaContent = META_STR(1),
MetaWidth = META_INT(2),
MetaHeight = META_INT(3),
MetaMediaDuration = META_LONG(4),
MetaMediaAudioCodec = META_STR(5),
MetaMediaVideoCodec = META_STR(6),
MetaMediaBitrate = META_LONG(7),
MetaArtist = META_STR(8),
MetaAlbum = META_STR(9),
MetaAlbumArtist = META_STR(10),
MetaGenre = META_STR(11),
MetaTitle = META_STR(12),
MetaFontName = META_STR(13),
MetaParent = META_STR(14),
MetaExifMake = META_STR(15),
MetaExifSoftware = META_STR(16),
MetaExifExposureTime = META_STR(17),
MetaExifFNumber = META_STR(18),
MetaExifFocalLength = META_STR(19),
MetaExifUserComment = META_STR(20),
MetaExifModel = META_STR(21),
MetaExifIsoSpeedRatings = META_STR(22),
MetaExifDateTime = META_STR(23),
MetaAuthor = META_STR(24),
MetaModifiedBy = META_STR(25),
MetaThumbnail = META_STR(26),
MetaPages = META_INT(27),
MetaExifGpsLongitudeDMS = META_STR(28),
MetaExifGpsLongitudeRef = META_STR(29),
MetaExifGpsLatitudeDMS = META_STR(30),
MetaExifGpsLatitudeRef = META_STR(31),
MetaExifGpsLatitudeDec = META_STR(32),
MetaExifGpsLongitudeDec = META_STR(33),
// String
MetaContent = 1,
MetaMediaAudioCodec,
MetaMediaVideoCodec,
MetaArtist,
MetaAlbum,
MetaAlbumArtist,
MetaGenre,
MetaTitle,
MetaFontName,
MetaParent,
MetaExifMake,
MetaExifSoftware,
MetaExifExposureTime,
MetaExifFNumber,
MetaExifFocalLength,
MetaExifUserComment,
MetaExifModel,
MetaExifIsoSpeedRatings,
MetaExifDateTime,
MetaAuthor,
MetaModifiedBy,
MetaThumbnail,
// Number
MetaWidth,
MetaHeight,
MetaMediaDuration,
MetaMediaBitrate,
MetaPages,
// ??
MetaExifGpsLongitudeDMS,
MetaExifGpsLongitudeRef,
MetaExifGpsLatitudeDMS,
MetaExifGpsLatitudeRef,
MetaExifGpsLatitudeDec,
MetaExifGpsLongitudeDec,
};
typedef struct meta_line {
@ -98,8 +90,8 @@ typedef struct meta_line {
enum metakey key;
union {
char str_val[0];
int int_val;
unsigned long long_val;
double double_val;
};
} meta_line_t;

View File

@ -255,7 +255,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
}
utf8_int32_t c;
char tmp[16];
char tmp[16] = {0};
do {
ptr = (char *) utf8codepoint(ptr, &c);

View File

@ -203,7 +203,7 @@ TEST(Ebook, CandlePdf) {
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4);
ASSERT_NE(get_meta(&doc, MetaContent)->str_val[0], ' ');
ASSERT_NE(size_before, store_size);
ASSERT_EQ(get_meta(&doc, MetaPages)->int_val, 16);
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 16);
cleanup(&doc, &f);
}
@ -711,8 +711,8 @@ TEST(RAW, Panasonic) {
ASSERT_STREQ(get_meta(&doc, MetaExifDateTime)->str_val, "2020:07:20 10:00:34");
ASSERT_STREQ(get_meta(&doc, MetaExifFocalLength)->str_val, "20.0");
ASSERT_STREQ(get_meta(&doc, MetaExifFNumber)->str_val, "2.0");
ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 5200);
ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 3904);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 5200);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 3904);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
@ -747,8 +747,8 @@ TEST(RAW, Nikon) {
ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw");
ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "D750");
ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Nikon");
ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 6032);
ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 4032);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 6032);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 4032);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
@ -766,8 +766,8 @@ TEST(RAW, Sony) {
ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw");
ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "ILCE-7RM3");
ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Sony");
ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 7968);
ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 5320);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 7968);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 5320);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
@ -785,8 +785,8 @@ TEST(RAW, Olympus) {
ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw");
ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "E-M5MarkII");
ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Olympus");
ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 4640);
ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 3472);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 4640);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 3472);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
@ -803,8 +803,8 @@ TEST(RAW, Fuji) {
ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "raw");
ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "X-T2");
ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "Fujifilm");
ASSERT_EQ(get_meta(&doc, MetaWidth)->int_val, 6032);
ASSERT_EQ(get_meta(&doc, MetaHeight)->int_val, 4028);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 6032);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 4028);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
@ -823,7 +823,7 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->int_val, 57);
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@ -978,6 +978,7 @@ int main(int argc, char **argv) {
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
ebook_ctx.tn_qscale = 1.0;
ebook_500_ctx = ebook_ctx;
ebook_500_ctx.content_size = 500;