From 6504f5ef3a8c18f513a9f5af9d11df7eb3ea912c Mon Sep 17 00:00:00 2001 From: simon987 Date: Thu, 9 Apr 2020 09:23:47 -0400 Subject: [PATCH] media tests (wip), fix ebook bugs, fix invalid webm duration --- libscan/ebook/ebook.c | 1 + libscan/media/media.c | 5 +- libscan/media/media.h | 1 - test/main.cpp | 124 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 127 insertions(+), 4 deletions(-) diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index 690b996..7929e85 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -175,6 +175,7 @@ void parse_ebook_mem(scan_ebook_ctx_t *ctx, void* buf, size_t buf_len, const cha mu_is_initialized = 1; } fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + thread_ctx = *ctx; init_fzctx(fzctx, doc); diff --git a/libscan/media/media.c b/libscan/media/media.c index 4ee633f..03924ee 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -228,6 +228,8 @@ append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, } } +#define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0) + void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) { int video_stream = -1; @@ -257,6 +259,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, if (video_stream == -1) { const AVCodecDescriptor *desc = avcodec_descriptor_get(stream->codecpar->codec_id); + if (desc != NULL) { meta_line_t *meta_vid = malloc(sizeof(meta_line_t)); meta_vid->key = MetaMediaVideoCodec; @@ -314,7 +317,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, return; } - append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1); + append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, IS_VIDEO(pFormatCtx)); // Scale frame AVFrame *scaled_frame = scale_frame(decoder, frame, ctx->tn_size); diff --git a/libscan/media/media.h b/libscan/media/media.h index 236c623..adfac2d 100644 --- a/libscan/media/media.h +++ b/libscan/media/media.h @@ -5,7 +5,6 @@ #include "../scan.h" typedef struct { - long content_size; int tn_size; float tn_qscale; diff --git a/test/main.cpp b/test/main.cpp index 49eda6c..6a10775 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -1,10 +1,12 @@ -#include "gtest/gtest.h" +#include #include "test_util.h" extern "C" { #include "../libscan/arc/arc.h" #include "../libscan/text/text.h" #include "../libscan/ebook/ebook.h" +#include "../libscan/media/media.h" +#include } static scan_arc_ctx_t arc_recurse_ctx; @@ -15,6 +17,8 @@ static scan_text_ctx_t text_500_ctx; static scan_ebook_ctx_t ebook_ctx; static scan_ebook_ctx_t ebook_500_ctx; +static scan_media_ctx_t media_ctx; + /* Text */ @@ -117,6 +121,116 @@ TEST(Ebook, Utf8Pdf) { cleanup(&doc, &f); } +TEST(Ebook, Epub1) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/ebook/epub1.epub", &f, &doc); + + parse_ebook(&ebook_500_ctx, &f, "application/epub+zip", &doc); + + ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Rabies"); + ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1); + cleanup(&doc, &f); +} + +TEST(Ebook, ComicCbz) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/ebook/lost_treasure.cbz", &f, &doc); + + parse_ebook(&ebook_500_ctx, &f, "application/vnd.comicbook+zip", &doc); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + +TEST(Ebook, ComicCbr) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/ebook/laugh.cbr", &f, &doc); + + parse_ebook(&ebook_500_ctx, &f, "application/vnd.comicbook-rar", &doc); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + +/* Media (image) */ + +TEST(MediaImage, Exif1) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/media/exiftest1.jpg", &f, &doc); + + parse_media(&media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&doc, MetaContent)->str_val, "I don't know if it's a thing mostly done for high end " + "hotels or what, but I've seen it in a few places in Thailand: " + "There's a tradition of flower folding, doing a sort of light " + "origami with the petals of lotus and other flowers, to make " + "cute little ornaments."); + ASSERT_STREQ(get_meta(&doc, MetaExifMake)->str_val, "NIKON CORPORATION"); + ASSERT_STREQ(get_meta(&doc, MetaExifModel)->str_val, "NIKON D7000"); + ASSERT_STREQ(get_meta(&doc, MetaExifDateTime)->str_val, "2019:11:08 14:37:59"); + ASSERT_STREQ(get_meta(&doc, MetaExifExposureTime)->str_val, "1:160"); + ASSERT_STREQ(get_meta(&doc, MetaArtist)->str_val, "FinalDoom"); + ASSERT_STREQ(get_meta(&doc, MetaExifSoftware)->str_val, "Adobe Photoshop Lightroom 5.7 (Windows)"); + ASSERT_STREQ(get_meta(&doc, MetaExifFNumber)->str_val, "53:10"); + ASSERT_STREQ(get_meta(&doc, MetaExifFocalLength)->str_val, "900:10"); + ASSERT_STREQ(get_meta(&doc, MetaExifIsoSpeedRatings)->str_val, "400"); + ASSERT_STREQ(get_meta(&doc, MetaExifExposureTime)->str_val, "1:160"); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + +TEST(MediaVideo, Vid3Mp4) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/media/vid3.mp4", &f, &doc); + + parse_media(&media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Helicopter (((Accident))) - " + "https://archive.org/details/Virginia_Helicopter_Crash"); + ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "h264"); + ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 825169); + ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + +TEST(MediaVideo, Vid3Ogv) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/media/vid3.ogv", &f, &doc); + + parse_media(&media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "theora"); + ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 590261); + ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + +TEST(MediaVideo, Vid3Webm) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/media/vid3.webm", &f, &doc); + + parse_media(&media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&doc, MetaMediaVideoCodec)->str_val, "vp8"); + ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 343153); + ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + int main(int argc, char **argv) { arc_recurse_ctx.log = noop_log; @@ -139,13 +253,19 @@ int main(int argc, char **argv) { ebook_ctx.tesseract_lang = "eng"; ebook_ctx.tesseract_path = "./tessdata"; ebook_ctx.tn_size = 500; - pthread_mutex_init(&ebook_ctx.mupdf_mutex, nullptr); ebook_ctx.log = noop_log; ebook_ctx.logf = noop_logf; ebook_500_ctx = ebook_ctx; ebook_500_ctx.content_size = 500; + media_ctx.log = noop_log; + media_ctx.logf = noop_logf; + media_ctx.store = noop_store; + media_ctx.tn_size = 500; + media_ctx.tn_qscale = 1.0; + + av_log_set_level(AV_LOG_QUIET); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } \ No newline at end of file