diff --git a/libscan/media/media.c b/libscan/media/media.c index 59a303e..1de979b 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -145,7 +145,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic while (meta != NULL) { if (meta->key == key) { CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'", - key, meta->str_val, key, tag->value) + key, meta->str_val, key, tag->value) return; } meta = meta->next; @@ -400,7 +400,6 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_ parse_media_format_ctx(ctx, pFormatCtx, doc); } - int vfile_read(void *ptr, uint8_t *buf, int buf_size) { struct vfile *f = ptr; @@ -412,6 +411,48 @@ int vfile_read(void *ptr, uint8_t *buf, int buf_size) { return ret; } +typedef struct { + struct stat info; + FILE *file; + void *buf; +} memfile_t; + +int memfile_read(void *ptr, uint8_t *buf, int buf_size) { + memfile_t *mem = ptr; + return (int) fread(buf, 1, buf_size, mem->file); +} + +long memfile_seek(void *ptr, long offset, int whence) { + memfile_t *mem = ptr; + + if (whence == 0x10000) { + return mem->info.st_size; + } + + return fseek(mem->file, offset, whence); +} + +int memfile_open(vfile_t *f, memfile_t *mem) { + mem->info = f->info; + + mem->buf = malloc(mem->info.st_size); + if (mem->buf == NULL) { + return -1; + } + + int ret = f->read(f, mem->buf, mem->info.st_size); + mem->file = fmemopen(mem->buf, mem->info.st_size, "rb"); + + return ret == mem->info.st_size ? 0 : -1; +} + +void memfile_close(memfile_t *mem) { + if (mem->buf != NULL) { + free(mem->buf); + fclose(mem->file); + } +} + void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) { AVFormatContext *pFormatCtx = avformat_alloc_context(); @@ -421,15 +462,29 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) } unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE); - AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL); + AVIOContext *io_ctx = NULL; + memfile_t memfile = {{}, 0, 0}; + + if (f->info.st_size <= ctx->max_media_buffer) { + int ret = memfile_open(f, &memfile); + if (ret == 0) { + CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size) + io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); + } + } + + if (io_ctx == NULL) { + CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size) + io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL); + } pFormatCtx->pb = io_ctx; - pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO; int res = avformat_open_input(&pFormatCtx, "", NULL, NULL); if (res == -5) { // Tried to parse media that requires seek av_free(io_ctx->buffer); + memfile_close(&memfile); avio_context_free(&io_ctx); avformat_close_input(&pFormatCtx); avformat_free_context(pFormatCtx); @@ -437,6 +492,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) } else if (res < 0) { CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) av_free(io_ctx->buffer); + memfile_close(&memfile); avio_context_free(&io_ctx); avformat_close_input(&pFormatCtx); avformat_free_context(pFormatCtx); @@ -446,6 +502,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) parse_media_format_ctx(ctx, pFormatCtx, doc); av_free(io_ctx->buffer); avio_context_free(&io_ctx); + memfile_close(&memfile); } void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc) { diff --git a/libscan/media/media.h b/libscan/media/media.h index adfac2d..fe3ee3b 100644 --- a/libscan/media/media.h +++ b/libscan/media/media.h @@ -11,6 +11,7 @@ typedef struct { log_callback_t log; logf_callback_t logf; store_callback_t store; + long max_media_buffer; } scan_media_ctx_t; void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc); diff --git a/libscan/scan.h b/libscan/scan.h index 12968eb..4251004 100644 --- a/libscan/scan.h +++ b/libscan/scan.h @@ -106,6 +106,9 @@ typedef struct vfile vfile_t; __attribute__((warn_unused_result)) typedef int (*read_func_t)(struct vfile *, void *buf, size_t size); +__attribute__((warn_unused_result)) +typedef long (*seek_func_t)(struct vfile *, long offset, int whence); + typedef void (*close_func_t)(struct vfile *); typedef void (*reset_func_t)(struct vfile *); @@ -122,6 +125,7 @@ typedef struct vfile { struct stat info; read_func_t read; + seek_func_t seek; close_func_t close; reset_func_t reset; } vfile_t; diff --git a/test/main.cpp b/test/main.cpp index e56fcb7..4eee5a3 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -11,7 +11,7 @@ extern "C" { #include } -static scan_arc_ctx_t arc_recurse_ctx; +static scan_arc_ctx_t arc_recurse_media_ctx; static scan_arc_ctx_t arc_list_ctx; static scan_text_ctx_t text_500_ctx; @@ -26,6 +26,13 @@ static scan_ooxml_ctx_t ooxml_500_ctx; static scan_mobi_ctx_t mobi_500_ctx; +document_t LastSubDoc; + +void _parse_media(parse_job_t *job) { + parse_media(&media_ctx, &job->vfile, &LastSubDoc); +} + + /* Text */ TEST(Text, BookCsvContentLen) { @@ -291,6 +298,21 @@ TEST(MediaVideo, Vid3Webm) { cleanup(&doc, &f); } +TEST(MediaVideoVfile, Vid3Ogv) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/arc/vid3.tar", &f, &doc); + + parse_archive(&arc_recurse_media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&LastSubDoc, MetaMediaVideoCodec)->str_val, "theora"); + ASSERT_EQ(get_meta(&LastSubDoc, MetaMediaBitrate)->long_val, 590261); + ASSERT_EQ(get_meta(&LastSubDoc, MetaMediaDuration)->long_val, 10); + + //TODO: Check that thumbnail was generated correctly + cleanup(&doc, &f); +} + TEST(MediaVideo, VidDuplicateTags) { vfile_t f; document_t doc; @@ -437,11 +459,11 @@ TEST(Arc, Utf8) { int main(int argc, char **argv) { setlocale(LC_ALL, ""); - arc_recurse_ctx.log = noop_log; - arc_recurse_ctx.logf = noop_logf; - arc_recurse_ctx.store = noop_store; - arc_recurse_ctx.mode = ARC_MODE_RECURSE; - arc_recurse_ctx.parse = nullptr; //TODO + arc_recurse_media_ctx.log = noop_log; + arc_recurse_media_ctx.logf = noop_logf; + arc_recurse_media_ctx.store = noop_store; + arc_recurse_media_ctx.mode = ARC_MODE_RECURSE; + arc_recurse_media_ctx.parse = _parse_media; arc_list_ctx.log = noop_log; arc_list_ctx.logf = noop_logf; @@ -468,6 +490,7 @@ int main(int argc, char **argv) { media_ctx.store = noop_store; media_ctx.tn_size = 500; media_ctx.tn_qscale = 1.0; + media_ctx.max_media_buffer = (long)2000 * 1024 * 1024; ooxml_500_ctx.content_size = 500; ooxml_500_ctx.log = noop_log;