Workaround when first ebook page is blank

This commit is contained in:
simon987 2020-08-15 10:11:03 -04:00
parent e344c7440c
commit 0e4906bc40
4 changed files with 72 additions and 21 deletions

View File

@ -172,9 +172,9 @@ if (BUILD_TESTS)
add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan)
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=undefined scan)
add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan)
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=address scan)
endif()

View File

@ -27,25 +27,35 @@ static void my_fz_unlock(UNUSED(void *user), int lock) {
}
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
int pixmap_is_blank(const fz_pixmap *pixmap) {
int pixmap_size = pixmap->n * pixmap->w * pixmap->h;
const int pixel0 = pixmap->samples[0];
for (int i = 0; i < pixmap_size; i++) {
if (pixmap->samples[i] != pixel0) {
return FALSE;
}
}
return TRUE;
}
fz_pixmap *load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) {
int err = 0;
fz_page *cover = NULL;
fz_var(cover);
fz_var(err);
fz_try(fzctx)
cover = fz_load_page(fzctx, fzdoc, 0);
*cover = fz_load_page(fzctx, fzdoc, page);
fz_catch(fzctx)
err = 1;
if (err != 0) {
fz_drop_page(fzctx, cover);
fz_drop_page(fzctx, *cover);
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
return FALSE;
return NULL;
}
fz_rect bounds = fz_bound_page(fzctx, cover);
fz_rect bounds = fz_bound_page(fzctx, *cover);
float scale;
float w = bounds.x1 - bounds.x0;
@ -65,12 +75,10 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
fz_var(err);
fz_try(fzctx)
{
fz_run_page(fzctx, cover, dev, fz_identity, NULL);
fz_try(fzctx) {
fz_run_page(fzctx, *cover, dev, fz_identity, NULL);
}
fz_always(fzctx)
{
fz_always(fzctx) {
fz_close_device(fzctx, dev);
fz_drop_device(fzctx, dev);
}
@ -79,22 +87,43 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
if (err != 0) {
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
fz_drop_page(fzctx, cover);
fz_drop_page(fzctx, *cover);
fz_drop_pixmap(fzctx, pixmap);
return FALSE;
return NULL;
}
if (pixmap->n != 3) {
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
fz_drop_page(fzctx, *cover);
fz_drop_pixmap(fzctx, pixmap);
return NULL;
}
return pixmap;
}
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
fz_page *cover = NULL;
fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover);
if (pixmap == NULL) {
return FALSE;
}
if (pixmap_is_blank(pixmap)) {
fz_drop_page(fzctx, cover);
fz_drop_pixmap(fzctx, pixmap);
return FALSE;
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
if (pixmap == NULL) {
return FALSE;
}
}
// RGB24 -> YUV420p
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *sws_ctx= sws_getContext(
struct SwsContext *sws_ctx = sws_getContext(
pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
SIST_SWS_ALGO, 0, 0, 0
@ -228,7 +257,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
}
}
void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const char* mime_str, document_t *doc) {
void parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc) {
fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
thread_ctx = *ctx;
@ -338,7 +367,7 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha
dev->clip_stroke_path = NULL;
dev->clip_stroke_text = NULL;
if (ctx->tesseract_lang!= NULL) {
if (ctx->tesseract_lang != NULL) {
dev->fill_image = fill_image;
}
@ -394,9 +423,9 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha
fz_drop_context(fzctx);
}
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc) {
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) {
size_t buf_len;
void * buf = read_all(f, &buf_len);
void *buf = read_all(f, &buf_len);
if (buf == NULL) {
CTX_LOG_ERROR(f->filepath, "read_all() failed")
return;

View File

@ -212,6 +212,18 @@ TEST(Ebook, Epub1) {
cleanup(&doc, &f);
}
TEST(Ebook, EpubBlankFirstPage) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/ebook/EpubBlankFirstPage.epub", &f, &doc);
parse_ebook(&ebook_500_ctx, &f, "application/epub+zip", &doc);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Design Culture");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4);
cleanup(&doc, &f);
}
/* Comic */
TEST(Comic, ComicCbz) {
vfile_t f;

View File

@ -2,6 +2,9 @@
#define SCAN_TEST_UTIL_H
#include "../libscan/scan.h"
#include <fcntl.h>
#include <unistd.h>
#include <uuid/uuid.h>
void load_file(const char *filepath, vfile_t *f);
void load_mem(void *mem, size_t size, vfile_t *f);
@ -21,6 +24,13 @@ static size_t store_size = 0;
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
store_size += value_len;
// char id[37];
// char tmp[PATH_MAX];
// uuid_unparse(reinterpret_cast<const unsigned char *>(key), id);
// sprintf(tmp, "%s.jpeg", id);
// int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777);
// write(fd, value, value_len);
// close(fd);
}
meta_line_t *get_meta(document_t *doc, metakey key);