Better support for media files inside archives

This commit is contained in:
simon987 2020-05-24 14:10:23 -04:00
parent db1f7b24d3
commit be2dabe466
4 changed files with 95 additions and 10 deletions

View File

@ -145,7 +145,7 @@ void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDic
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value)
key, meta->str_val, key, tag->value)
return;
}
meta = meta->next;
@ -400,7 +400,6 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_
parse_media_format_ctx(ctx, pFormatCtx, doc);
}
int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
struct vfile *f = ptr;
@ -412,6 +411,48 @@ int vfile_read(void *ptr, uint8_t *buf, int buf_size) {
return ret;
}
typedef struct {
struct stat info;
FILE *file;
void *buf;
} memfile_t;
int memfile_read(void *ptr, uint8_t *buf, int buf_size) {
memfile_t *mem = ptr;
return (int) fread(buf, 1, buf_size, mem->file);
}
long memfile_seek(void *ptr, long offset, int whence) {
memfile_t *mem = ptr;
if (whence == 0x10000) {
return mem->info.st_size;
}
return fseek(mem->file, offset, whence);
}
int memfile_open(vfile_t *f, memfile_t *mem) {
mem->info = f->info;
mem->buf = malloc(mem->info.st_size);
if (mem->buf == NULL) {
return -1;
}
int ret = f->read(f, mem->buf, mem->info.st_size);
mem->file = fmemopen(mem->buf, mem->info.st_size, "rb");
return ret == mem->info.st_size ? 0 : -1;
}
void memfile_close(memfile_t *mem) {
if (mem->buf != NULL) {
free(mem->buf);
fclose(mem->file);
}
}
void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) {
AVFormatContext *pFormatCtx = avformat_alloc_context();
@ -421,15 +462,29 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
}
unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE);
AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
AVIOContext *io_ctx = NULL;
memfile_t memfile = {{}, 0, 0};
if (f->info.st_size <= ctx->max_media_buffer) {
int ret = memfile_open(f, &memfile);
if (ret == 0) {
CTX_LOG_DEBUGF(f->filepath, "Loading media file in memory (%ldB)", f->info.st_size)
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek);
}
}
if (io_ctx == NULL) {
CTX_LOG_DEBUGF(f->filepath, "Reading media file without seek support", f->info.st_size)
io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL);
}
pFormatCtx->pb = io_ctx;
pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO;
int res = avformat_open_input(&pFormatCtx, "", NULL, NULL);
if (res == -5) {
// Tried to parse media that requires seek
av_free(io_ctx->buffer);
memfile_close(&memfile);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
@ -437,6 +492,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
} else if (res < 0) {
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer);
memfile_close(&memfile);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
@ -446,6 +502,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
parse_media_format_ctx(ctx, pFormatCtx, doc);
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
memfile_close(&memfile);
}
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc) {

View File

@ -11,6 +11,7 @@ typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
long max_media_buffer;
} scan_media_ctx_t;
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@ -106,6 +106,9 @@ typedef struct vfile vfile_t;
__attribute__((warn_unused_result))
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
__attribute__((warn_unused_result))
typedef long (*seek_func_t)(struct vfile *, long offset, int whence);
typedef void (*close_func_t)(struct vfile *);
typedef void (*reset_func_t)(struct vfile *);
@ -122,6 +125,7 @@ typedef struct vfile {
struct stat info;
read_func_t read;
seek_func_t seek;
close_func_t close;
reset_func_t reset;
} vfile_t;

View File

@ -11,7 +11,7 @@ extern "C" {
#include <libavutil/avutil.h>
}
static scan_arc_ctx_t arc_recurse_ctx;
static scan_arc_ctx_t arc_recurse_media_ctx;
static scan_arc_ctx_t arc_list_ctx;
static scan_text_ctx_t text_500_ctx;
@ -26,6 +26,13 @@ static scan_ooxml_ctx_t ooxml_500_ctx;
static scan_mobi_ctx_t mobi_500_ctx;
document_t LastSubDoc;
void _parse_media(parse_job_t *job) {
parse_media(&media_ctx, &job->vfile, &LastSubDoc);
}
/* Text */
TEST(Text, BookCsvContentLen) {
@ -291,6 +298,21 @@ TEST(MediaVideo, Vid3Webm) {
cleanup(&doc, &f);
}
TEST(MediaVideoVfile, Vid3Ogv) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/arc/vid3.tar", &f, &doc);
parse_archive(&arc_recurse_media_ctx, &f, &doc);
ASSERT_STREQ(get_meta(&LastSubDoc, MetaMediaVideoCodec)->str_val, "theora");
ASSERT_EQ(get_meta(&LastSubDoc, MetaMediaBitrate)->long_val, 590261);
ASSERT_EQ(get_meta(&LastSubDoc, MetaMediaDuration)->long_val, 10);
//TODO: Check that thumbnail was generated correctly
cleanup(&doc, &f);
}
TEST(MediaVideo, VidDuplicateTags) {
vfile_t f;
document_t doc;
@ -437,11 +459,11 @@ TEST(Arc, Utf8) {
int main(int argc, char **argv) {
setlocale(LC_ALL, "");
arc_recurse_ctx.log = noop_log;
arc_recurse_ctx.logf = noop_logf;
arc_recurse_ctx.store = noop_store;
arc_recurse_ctx.mode = ARC_MODE_RECURSE;
arc_recurse_ctx.parse = nullptr; //TODO
arc_recurse_media_ctx.log = noop_log;
arc_recurse_media_ctx.logf = noop_logf;
arc_recurse_media_ctx.store = noop_store;
arc_recurse_media_ctx.mode = ARC_MODE_RECURSE;
arc_recurse_media_ctx.parse = _parse_media;
arc_list_ctx.log = noop_log;
arc_list_ctx.logf = noop_logf;
@ -468,6 +490,7 @@ int main(int argc, char **argv) {
media_ctx.store = noop_store;
media_ctx.tn_size = 500;
media_ctx.tn_qscale = 1.0;
media_ctx.max_media_buffer = (long)2000 * 1024 * 1024;
ooxml_500_ctx.content_size = 500;
ooxml_500_ctx.log = noop_log;