mirror of
https://github.com/simon987/libscan.git
synced 2025-04-05 12:23:00 +00:00
Workaround when first ebook page is blank
This commit is contained in:
parent
e344c7440c
commit
0e4906bc40
@ -172,9 +172,9 @@ if (BUILD_TESTS)
|
|||||||
|
|
||||||
add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
|
add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||||
target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
|
target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer)
|
||||||
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan)
|
target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=undefined scan)
|
||||||
|
|
||||||
add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
|
add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h)
|
||||||
target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
|
target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer)
|
||||||
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan)
|
target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main uuid -fsanitize=address scan)
|
||||||
endif()
|
endif()
|
||||||
|
@ -27,25 +27,35 @@ static void my_fz_unlock(UNUSED(void *user), int lock) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
|
int pixmap_is_blank(const fz_pixmap *pixmap) {
|
||||||
|
int pixmap_size = pixmap->n * pixmap->w * pixmap->h;
|
||||||
|
const int pixel0 = pixmap->samples[0];
|
||||||
|
for (int i = 0; i < pixmap_size; i++) {
|
||||||
|
if (pixmap->samples[i] != pixel0) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
fz_pixmap *load_pixmap(scan_ebook_ctx_t *ctx, int page, fz_context *fzctx, fz_document *fzdoc, document_t *doc, fz_page **cover) {
|
||||||
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
fz_page *cover = NULL;
|
|
||||||
|
|
||||||
fz_var(cover);
|
fz_var(cover);
|
||||||
fz_var(err);
|
fz_var(err);
|
||||||
fz_try(fzctx)
|
fz_try(fzctx)
|
||||||
cover = fz_load_page(fzctx, fzdoc, 0);
|
*cover = fz_load_page(fzctx, fzdoc, page);
|
||||||
fz_catch(fzctx)
|
fz_catch(fzctx)
|
||||||
err = 1;
|
err = 1;
|
||||||
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
fz_drop_page(fzctx, cover);
|
fz_drop_page(fzctx, *cover);
|
||||||
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
return FALSE;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
fz_rect bounds = fz_bound_page(fzctx, cover);
|
fz_rect bounds = fz_bound_page(fzctx, *cover);
|
||||||
|
|
||||||
float scale;
|
float scale;
|
||||||
float w = bounds.x1 - bounds.x0;
|
float w = bounds.x1 - bounds.x0;
|
||||||
@ -65,12 +75,10 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
|||||||
fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
|
fz_device *dev = fz_new_draw_device(fzctx, m, pixmap);
|
||||||
|
|
||||||
fz_var(err);
|
fz_var(err);
|
||||||
fz_try(fzctx)
|
fz_try(fzctx) {
|
||||||
{
|
fz_run_page(fzctx, *cover, dev, fz_identity, NULL);
|
||||||
fz_run_page(fzctx, cover, dev, fz_identity, NULL);
|
|
||||||
}
|
}
|
||||||
fz_always(fzctx)
|
fz_always(fzctx) {
|
||||||
{
|
|
||||||
fz_close_device(fzctx, dev);
|
fz_close_device(fzctx, dev);
|
||||||
fz_drop_device(fzctx, dev);
|
fz_drop_device(fzctx, dev);
|
||||||
}
|
}
|
||||||
@ -79,22 +87,43 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
|||||||
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
|
||||||
fz_drop_page(fzctx, cover);
|
fz_drop_page(fzctx, *cover);
|
||||||
fz_drop_pixmap(fzctx, pixmap);
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
return FALSE;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pixmap->n != 3) {
|
if (pixmap->n != 3) {
|
||||||
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
|
CTX_LOG_ERRORF(doc->filepath, "Got unexpected pixmap depth: %d", pixmap->n)
|
||||||
|
fz_drop_page(fzctx, *cover);
|
||||||
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pixmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) {
|
||||||
|
|
||||||
|
fz_page *cover = NULL;
|
||||||
|
fz_pixmap *pixmap = load_pixmap(ctx, 0, fzctx, fzdoc, doc, &cover);
|
||||||
|
if (pixmap == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pixmap_is_blank(pixmap)) {
|
||||||
fz_drop_page(fzctx, cover);
|
fz_drop_page(fzctx, cover);
|
||||||
fz_drop_pixmap(fzctx, pixmap);
|
fz_drop_pixmap(fzctx, pixmap);
|
||||||
return FALSE;
|
CTX_LOG_DEBUG(doc->filepath, "Cover page is blank, using page 1 instead")
|
||||||
|
pixmap = load_pixmap(ctx, 1, fzctx, fzdoc, doc, &cover);
|
||||||
|
if (pixmap == NULL) {
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB24 -> YUV420p
|
// RGB24 -> YUV420p
|
||||||
AVFrame *scaled_frame = av_frame_alloc();
|
AVFrame *scaled_frame = av_frame_alloc();
|
||||||
|
|
||||||
struct SwsContext *sws_ctx= sws_getContext(
|
struct SwsContext *sws_ctx = sws_getContext(
|
||||||
pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
|
pixmap->w, pixmap->h, AV_PIX_FMT_RGB24,
|
||||||
pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
|
pixmap->w, pixmap->h, AV_PIX_FMT_YUV420P,
|
||||||
SIST_SWS_ALGO, 0, 0, 0
|
SIST_SWS_ALGO, 0, 0, 0
|
||||||
@ -228,7 +257,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const char* mime_str, document_t *doc) {
|
void parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc) {
|
||||||
|
|
||||||
fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
|
fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
|
||||||
thread_ctx = *ctx;
|
thread_ctx = *ctx;
|
||||||
@ -338,7 +367,7 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha
|
|||||||
dev->clip_stroke_path = NULL;
|
dev->clip_stroke_path = NULL;
|
||||||
dev->clip_stroke_text = NULL;
|
dev->clip_stroke_text = NULL;
|
||||||
|
|
||||||
if (ctx->tesseract_lang!= NULL) {
|
if (ctx->tesseract_lang != NULL) {
|
||||||
dev->fill_image = fill_image;
|
dev->fill_image = fill_image;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -394,9 +423,9 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha
|
|||||||
fz_drop_context(fzctx);
|
fz_drop_context(fzctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc) {
|
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc) {
|
||||||
size_t buf_len;
|
size_t buf_len;
|
||||||
void * buf = read_all(f, &buf_len);
|
void *buf = read_all(f, &buf_len);
|
||||||
if (buf == NULL) {
|
if (buf == NULL) {
|
||||||
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
CTX_LOG_ERROR(f->filepath, "read_all() failed")
|
||||||
return;
|
return;
|
||||||
|
@ -212,6 +212,18 @@ TEST(Ebook, Epub1) {
|
|||||||
cleanup(&doc, &f);
|
cleanup(&doc, &f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Ebook, EpubBlankFirstPage) {
|
||||||
|
vfile_t f;
|
||||||
|
document_t doc;
|
||||||
|
load_doc_file("libscan-test-files/test_files/ebook/EpubBlankFirstPage.epub", &f, &doc);
|
||||||
|
|
||||||
|
parse_ebook(&ebook_500_ctx, &f, "application/epub+zip", &doc);
|
||||||
|
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Design Culture");
|
||||||
|
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4);
|
||||||
|
cleanup(&doc, &f);
|
||||||
|
}
|
||||||
|
|
||||||
/* Comic */
|
/* Comic */
|
||||||
TEST(Comic, ComicCbz) {
|
TEST(Comic, ComicCbz) {
|
||||||
vfile_t f;
|
vfile_t f;
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
#define SCAN_TEST_UTIL_H
|
#define SCAN_TEST_UTIL_H
|
||||||
|
|
||||||
#include "../libscan/scan.h"
|
#include "../libscan/scan.h"
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <uuid/uuid.h>
|
||||||
|
|
||||||
void load_file(const char *filepath, vfile_t *f);
|
void load_file(const char *filepath, vfile_t *f);
|
||||||
void load_mem(void *mem, size_t size, vfile_t *f);
|
void load_mem(void *mem, size_t size, vfile_t *f);
|
||||||
@ -21,6 +24,13 @@ static size_t store_size = 0;
|
|||||||
|
|
||||||
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
|
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
|
||||||
store_size += value_len;
|
store_size += value_len;
|
||||||
|
// char id[37];
|
||||||
|
// char tmp[PATH_MAX];
|
||||||
|
// uuid_unparse(reinterpret_cast<const unsigned char *>(key), id);
|
||||||
|
// sprintf(tmp, "%s.jpeg", id);
|
||||||
|
// int fd = open(tmp, O_TRUNC|O_WRONLY|O_CREAT, 0777);
|
||||||
|
// write(fd, value, value_len);
|
||||||
|
// close(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
meta_line_t *get_meta(document_t *doc, metakey key);
|
meta_line_t *get_meta(document_t *doc, metakey key);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user