From 0e4906bc405ebe112b55b7ca5720b923b4cbd681 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sat, 15 Aug 2020 10:11:03 -0400 Subject: [PATCH] Workaround when first ebook page is blank --- CMakeLists.txt | 4 +-- libscan/ebook/ebook.c | 67 +++++++++++++++++++++++++++++++------------ test/main.cpp | 12 ++++++++ test/test_util.h | 10 +++++++ 4 files changed, 72 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20083a3..e4514ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,9 +172,9 @@ if (BUILD_TESTS) add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h) target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer) - target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan) + target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=undefined scan) add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h) target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer) - target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan) + target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=address scan) endif() diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index 9f9edee..9c0f2f0 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -27,25 +27,35 @@ static void my_fz_unlock(UNUSED(void *user), int lock) { } -int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) { +int pixmap_is_blank(const fz_pixmap *pixmap) { + int pixmap_size = pixmap->n * pixmap->w * pixmap->h; + const int pixel0 = pixmap->samples[0]; + for (int i = 0; i < pixmap_size; i++) { + if (pixmap->samples[i] != pixel0) { + return FALSE; + } + } + return TRUE; +} + +fz_pixmap *load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) { int err = 0; - fz_page *cover = NULL; fz_var(cover); fz_var(err); fz_try(fzctx) - cover = fz_load_page(fzctx, fzdoc, 0); + *cover = fz_load_page(fzctx, fzdoc, page); fz_catch(fzctx) err = 1; if (err != 0) { - fz_drop_page(fzctx, cover); + fz_drop_page(fzctx, *cover); CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) - return FALSE; + return NULL; } - fz_rect bounds = fz_bound_page(fzctx, cover); + fz_rect bounds = fz_bound_page(fzctx, *cover); float scale; float w = bounds.x1 - bounds.x0; @@ -65,12 +75,10 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d fz_device *dev = fz_new_draw_device(fzctx, m, pixmap); fz_var(err); - fz_try(fzctx) - { - fz_run_page(fzctx, cover, dev, fz_identity, NULL); + fz_try(fzctx) { + fz_run_page(fzctx, *cover, dev, fz_identity, NULL); } - fz_always(fzctx) - { + fz_always(fzctx) { fz_close_device(fzctx, dev); fz_drop_device(fzctx, dev); } @@ -79,22 +87,43 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d if (err != 0) { CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) - fz_drop_page(fzctx, cover); + fz_drop_page(fzctx, *cover); fz_drop_pixmap(fzctx, pixmap); - return FALSE; + return NULL; } if (pixmap->n != 3) { CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n) + fz_drop_page(fzctx, *cover); + fz_drop_pixmap(fzctx, pixmap); + return NULL; + } + + return pixmap; +} + +int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) { + + fz_page *cover = NULL; + fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover); + if (pixmap == NULL) { + return FALSE; + } + + if (pixmap_is_blank(pixmap)) { fz_drop_page(fzctx, cover); fz_drop_pixmap(fzctx, pixmap); - return FALSE; + CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead") + pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover); + if (pixmap == NULL) { + return FALSE; + } } // RGB24 -> YUV420p AVFrame *scaled_frame = av_frame_alloc(); - struct SwsContext *sws_ctx= sws_getContext( + struct SwsContext *sws_ctx = sws_getContext( pixmap->w, pixmap->h, AV_PIX_FMT_RGB24, pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P, SIST_SWS_ALGO, 0, 0, 0 @@ -228,7 +257,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), } } -void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const char* mime_str, document_t *doc) { +void parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc) { fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); thread_ctx = *ctx; @@ -338,7 +367,7 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha dev->clip_stroke_path = NULL; dev->clip_stroke_text = NULL; - if (ctx->tesseract_lang!= NULL) { + if (ctx->tesseract_lang != NULL) { dev->fill_image = fill_image; } @@ -394,9 +423,9 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha fz_drop_context(fzctx); } -void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc) { +void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) { size_t buf_len; - void * buf = read_all(f, &buf_len); + void *buf = read_all(f, &buf_len); if (buf == NULL) { CTX_LOG_ERROR(f->filepath, "read_all() failed") return; diff --git a/test/main.cpp b/test/main.cpp index d070607..5607fb2 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -212,6 +212,18 @@ TEST(Ebook, Epub1) { cleanup(&doc, &f); } +TEST(Ebook, EpubBlankFirstPage) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/ebook/EpubBlankFirstPage.epub", &f, &doc); + + parse_ebook(&ebook_500_ctx, &f, "application/epub+zip", &doc); + + ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Design Culture"); + ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4); + cleanup(&doc, &f); +} + /* Comic */ TEST(Comic, ComicCbz) { vfile_t f; diff --git a/test/test_util.h b/test/test_util.h index 26022be..32bde2a 100644 --- a/test/test_util.h +++ b/test/test_util.h @@ -2,6 +2,9 @@ #define SCAN_TEST_UTIL_H #include "../libscan/scan.h" +#include +#include +#include void load_file(const char *filepath, vfile_t *f); void load_mem(void *mem, size_t size, vfile_t *f); @@ -21,6 +24,13 @@ static size_t store_size = 0; static void counter_store(char* key, size_t key_len, char *value, size_t value_len) { store_size += value_len; +// char id[37]; +// char tmp[PATH_MAX]; +// uuid_unparse(reinterpret_cast(key), id); +// sprintf(tmp, "%s.jpeg", id); +// int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777); +// write(fd, value, value_len); +// close(fd); } meta_line_t *get_meta(document_t *doc, metakey key);