Add thumbnail-count option

This commit is contained in:
2022-02-19 13:43:33 -05:00
parent a0db49e7d8
commit 3d4331b27d
37 changed files with 651 additions and 23780 deletions

View File

@@ -12,7 +12,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive_entry *entry = NULL;
arc_data_t arc_data;
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}
@@ -44,7 +44,20 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
break;
}
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
scan_media_ctx_t media_ctx = {
.tn_count = ctx->enable_tn ? 1 : 0,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL,
.read_subtitles = FALSE,
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);
free(buf);
if (ret == TRUE) {

View File

@@ -9,6 +9,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;

View File

@@ -155,7 +155,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, pixmap->w, pixmap->h)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
@@ -283,7 +283,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
APPEND_LONG_META(doc, MetaPages, page_count)
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc);
@@ -404,7 +404,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}

View File

@@ -6,6 +6,7 @@
typedef struct {
long content_size;
int tn_size;
int enable_tn;
const char *tesseract_lang;
const char *tesseract_path;
pthread_mutex_t mupdf_mutex;

View File

@@ -176,7 +176,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
strcpy(meta_name->str_val, font_name);
APPEND_META(doc, meta_name)
if (ctx->enable_tn == TRUE) {
if (!ctx->enable_tn) {
FT_Done_Face(face);
free(buf);
return;
@@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_TN_META(doc, dimensions.width, dimensions.height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);

View File

@@ -35,11 +35,6 @@
meta_long->long_val = value; \
APPEND_META(doc, meta_long)}
#define APPEND_TN_META(doc, width, height) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
meta_str->key = MetaThumbnail; \
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
APPEND_META(doc, meta_str)}
#define APPEND_META(doc, meta) \
meta->next = NULL;\

View File

@@ -6,7 +6,6 @@
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STREAM_IS_IMAGE (stream->nb_frames <= 1)
#define STORE_AS_IS ((void*)-1)
@@ -398,6 +397,110 @@ void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *dec
av_frame_free(&rgb_frame);
}
#define SAVE_THUMBNAIL_OK 0
#define SAVE_THUMBNAIL_SKIPPED 1
#define SAVE_THUMBNAIL_FAILED 2
int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder,
AVStream *stream, int video_stream, document_t *doc, double seek_ratio,
int thumbnail_index) {
if (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ok = FALSE;
double target_timestamp = (double) pFormatCtx->duration * seek_ratio;
long ts = (long) target_timestamp;
int seek_ret = avformat_seek_file(
// Allow +- 1s
pFormatCtx, -1, ts - AV_TIME_BASE, ts, ts + AV_TIME_BASE,
0
);
if (seek_ret == 0) {
seek_ok = TRUE;
} else {
CTX_LOG_DEBUGF(
doc->filepath,
"(media.c) Could not seek media file: %s", av_err2str(seek_ret)
)
}
if (seek_ok == FALSE && thumbnail_index != 0) {
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
return SAVE_THUMBNAIL_FAILED;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
if (thumbnail_index == 0) {
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
return SAVE_THUMBNAIL_FAILED;
}
int return_value;
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
return_value = SAVE_THUMBNAIL_OK;
// TO FIX: the 2nd rendered frame is always broken, just skip it until
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
return_value = SAVE_THUMBNAIL_SKIPPED;
}
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
return return_value;
}
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
@@ -458,7 +561,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
append_audio_meta(pFormatCtx, doc);
}
if (video_stream != -1 && ctx->tn_size > 0) {
if (video_stream != -1 && ctx->tn_count > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@@ -473,69 +576,38 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
(long) ((double) stream->duration * 0.10), 0);
if (seek_ret == 0) {
break;
}
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
? 0.10
: 1.0 / (thumbnails_to_generate + 1);
int number_of_thumbnails_generated = 0;
int save_thumbnail_ret;
for (int i = 0; i < thumbnails_to_generate; i++) {
double seek_ratio = seek_increment * i + seek_increment * 0.9;
save_thumbnail_ret = decode_frame_and_save_thumbnail(ctx, pFormatCtx, decoder, stream, video_stream, doc,
seek_ratio, i);
if (save_thumbnail_ret == SAVE_THUMBNAIL_FAILED) {
break;
}
if (save_thumbnail_ret == SAVE_THUMBNAIL_OK) {
number_of_thumbnails_generated += 1;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
if (number_of_thumbnails_generated > 0) {
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
}
if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
}
@@ -772,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
@@ -786,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);

View File

@@ -17,6 +17,9 @@ typedef struct {
int tn_size;
float tn_qscale;
/** Number of thumbnails to generate for videos */
int tn_count;
long max_media_buffer;
int read_subtitles;

View File

@@ -76,6 +76,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *b
scan_ebook_ctx_t ebook_ctx = {
.content_size = ctx->content_size,
.tn_size = ctx->tn_size,
.enable_tn = TRUE,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
@@ -137,7 +138,7 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
char *buf_pdf = malloc(buf_len);
memcpy(buf_pdf, buf, buf_len);
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);

View File

@@ -5,6 +5,7 @@
typedef struct {
long content_size;
int enable_tn;
int tn_size;
log_callback_t log;
logf_callback_t logf;

View File

@@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
char *buf = malloc(entry_size);
archive_read_data(a, buf, entry_size);
APPEND_TN_META(doc, 1, 1) // Size unknown
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
free(buf);
}
@@ -238,7 +238,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (read_doc_props(ctx, a, doc) != 0) {
break;
}
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
read_thumbnail(ctx, doc, a, entry);
}
}

View File

@@ -5,6 +5,7 @@
#include "../scan.h"
typedef struct {
int enable_tn;
long content_size;
log_callback_t log;
logf_callback_t logf;

View File

@@ -69,7 +69,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
@@ -157,7 +157,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
free(buf);
libraw_close(libraw_lib);
return;

View File

@@ -8,6 +8,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;
} scan_raw_ctx_t;

View File

@@ -350,9 +350,13 @@ TEST(Comic, ComicIssue160) {
load_doc_file("libscan-test-files/test_files/ebook/comic-segfault-issue-160.cbr", &f, &doc);
int tn_size_saved = comic_ctx.tn_size;
comic_ctx.tn_size = 0;
size_t size_before = store_size;
comic_ctx.enable_tn = FALSE;
parse_comic(&comic_ctx, &f, &doc);
comic_ctx.tn_size = tn_size_saved;
comic_ctx.enable_tn = tn_size_saved;
ASSERT_EQ(store_size, size_before);
cleanup(&doc, &f);
}
@@ -669,8 +673,6 @@ TEST(Ooxml, Docx2Archive) {
ASSERT_EQ(get_meta(&LastSubDoc, MetaPages)->long_val, 1);
ASSERT_EQ(strlen(get_meta(&LastSubDoc, MetaContent)->str_val), 2780);
fprintf(stderr, "%s\n", get_meta(&LastSubDoc, MetaContent)->str_val);
ooxml_500_ctx.content_size = 500;
cleanup(&doc, &f);
@@ -1111,6 +1113,7 @@ int main(int argc, char **argv) {
ebook_ctx.tesseract_lang = "eng";
ebook_ctx.tesseract_path = "./tessdata";
ebook_ctx.tn_size = 500;
ebook_ctx.enable_tn = TRUE;
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
@@ -1124,12 +1127,14 @@ int main(int argc, char **argv) {
comic_ctx.tn_qscale = 1.0;
comic_ctx.tn_size = 500;
comic_ctx.enable_tn = TRUE;
comic_ctx.log = noop_log;
comic_ctx.logf = noop_logf;
comic_ctx.store = counter_store;
comic_big_ctx.tn_qscale = 1.0;
comic_big_ctx.tn_size = 5000;
comic_big_ctx.enable_tn = TRUE;
comic_big_ctx.log = noop_log;
comic_big_ctx.logf = noop_logf;
comic_big_ctx.store = counter_store;
@@ -1138,10 +1143,12 @@ int main(int argc, char **argv) {
media_ctx.logf = noop_logf;
media_ctx.store = counter_store;
media_ctx.tn_size = 500;
media_ctx.tn_count = 1;
media_ctx.tn_qscale = 1.0;
media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;
ooxml_500_ctx.content_size = 500;
ooxml_500_ctx.enable_tn = TRUE;
ooxml_500_ctx.log = noop_log;
ooxml_500_ctx.logf = noop_logf;
ooxml_500_ctx.store = counter_store;
@@ -1154,6 +1161,7 @@ int main(int argc, char **argv) {
raw_ctx.logf = noop_logf;
raw_ctx.store = counter_store;
raw_ctx.tn_size = 500;
raw_ctx.enable_tn = TRUE;
raw_ctx.tn_qscale = 5.0;
msdoc_ctx.log = noop_log;
@@ -1161,12 +1169,14 @@ int main(int argc, char **argv) {
msdoc_ctx.store = counter_store;
msdoc_ctx.content_size = 500;
msdoc_ctx.tn_size = 500;
msdoc_ctx.enable_tn = TRUE;
msdoc_text_ctx.log = noop_log;
msdoc_text_ctx.logf = noop_logf;
msdoc_text_ctx.store = counter_store;
msdoc_text_ctx.content_size = 500;
msdoc_text_ctx.tn_size = 0;
msdoc_text_ctx.enable_tn = FALSE;
wpd_ctx.log = noop_log;
wpd_ctx.logf = noop_logf;