diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d983f3..967e2e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ add_library( libscan/text/text.c libscan/text/text.h libscan/arc/arc.c libscan/arc/arc.h libscan/ebook/ebook.c libscan/ebook/ebook.h - libscan/cbr/cbr.c libscan/cbr/cbr.h + libscan/comic/comic.c libscan/comic/comic.h libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h libscan/media/media.c libscan/media/media.h libscan/font/font.c libscan/font/font.h diff --git a/libscan/arc/arc.c b/libscan/arc/arc.c index bf2c482..2a134ee 100644 --- a/libscan/arc/arc.c +++ b/libscan/arc/arc.c @@ -1,15 +1,11 @@ #include "arc.h" -#include "../scan.h" -#include "../util.h" - #include #include #include #include - int should_parse_filtered_file(const char *filepath, int ext) { char tmp[PATH_MAX * 2]; @@ -41,67 +37,41 @@ int arc_read(struct vfile *f, void *buf, size_t size) { return archive_read_data(f->arc, buf, size); } -typedef struct arc_data { - vfile_t *f; - char buf[ARC_BUF_SIZE]; -} arc_data_f; +int arc_open(vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) { + arc_data->f = f; -int vfile_open_callback(struct archive *a, void *user_data) { - arc_data_f *data = user_data; + if (f->is_fs_file) { + *a = archive_read_new(); + archive_read_support_filter_all(*a); + archive_read_support_format_all(*a); - if (data->f->is_fs_file && data->f->fd == -1) { - data->f->fd = open(data->f->filepath, O_RDONLY); - } + return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE); + } else if (allow_recurse) { + *a = archive_read_new(); + archive_read_support_filter_all(*a); + archive_read_support_format_all(*a); - return ARCHIVE_OK; -} - -long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { - arc_data_f *data = user_data; - - *buf = data->buf; - return data->f->read(data->f, data->buf, ARC_BUF_SIZE); -} - -int vfile_close_callback(struct archive *a, void *user_data) { - arc_data_f *data = user_data; - - if (data->f->close != NULL) { - data->f->close(data->f); - } - - return ARCHIVE_OK; -} - -scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { - - struct archive *a; - struct archive_entry *entry; - - arc_data_f data; - data.f = f; - - int ret = 0; - if (data.f->is_fs_file) { - - a = archive_read_new(); - archive_read_support_filter_all(a); - archive_read_support_format_all(a); - - ret = archive_read_open_filename(a, f->filepath, ARC_BUF_SIZE); - } else if (ctx->mode == ARC_MODE_RECURSE) { - - a = archive_read_new(); - archive_read_support_filter_all(a); - archive_read_support_format_all(a); - - ret = archive_read_open( - a, &data, + return archive_read_open( + *a, arc_data, vfile_open_callback, vfile_read_callback, vfile_close_callback ); } else { + return ARC_SKIPPED; + } +} + +scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { + + struct archive *a = NULL; + struct archive_entry *entry = NULL; + + arc_data_t arc_data; + arc_data.f = f; + + int ret = arc_open(f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE); + if (ret == ARC_SKIPPED) { return SCAN_OK; } @@ -112,15 +82,14 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { } if (ctx->mode == ARC_MODE_LIST) { - dyn_buffer_t buf = dyn_buffer_create(); while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { if (S_ISREG(archive_entry_stat(entry)->st_mode)) { + const char* utf8_name = archive_entry_pathname_utf8(entry); + const char* file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name; - char *path = (char *) archive_entry_pathname_utf8(entry); - - dyn_buffer_append_string(&buf, path); + dyn_buffer_append_string(&buf, file_path); dyn_buffer_write_char(&buf, ' '); } } diff --git a/libscan/arc/arc.h b/libscan/arc/arc.h index fd6fb35..5f7333b 100644 --- a/libscan/arc/arc.h +++ b/libscan/arc/arc.h @@ -3,8 +3,10 @@ #include #include +#include #include "../scan.h" +# define ARC_SKIPPED -1 #define ARC_MODE_SKIP 0 #define ARC_MODE_LIST 1 #define ARC_MODE_SHALLOW 2 @@ -22,6 +24,40 @@ typedef struct { #define ARC_BUF_SIZE 8192 +typedef struct { + vfile_t *f; + char buf[ARC_BUF_SIZE]; +} arc_data_t; + +static int vfile_open_callback(struct archive *a, void *user_data) { + arc_data_t *data = (arc_data_t*)user_data; + + if (data->f->is_fs_file && data->f->fd == -1) { + data->f->fd = open(data->f->filepath, O_RDONLY); + } + + return ARCHIVE_OK; +} + +static long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { + arc_data_t *data = (arc_data_t*)user_data; + + *buf = data->buf; + return data->f->read(data->f, data->buf, ARC_BUF_SIZE); +} + +static int vfile_close_callback(struct archive *a, void *user_data) { + arc_data_t *data = (arc_data_t*)user_data; + + if (data->f->close != NULL) { + data->f->close(data->f); + } + + return ARCHIVE_OK; +} + +int arc_open(vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse); + int should_parse_filtered_file(const char *filepath, int ext); scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc); diff --git a/libscan/cbr/cbr.c b/libscan/cbr/cbr.c deleted file mode 100644 index 17bef6e..0000000 --- a/libscan/cbr/cbr.c +++ /dev/null @@ -1,50 +0,0 @@ -#include "cbr.h" -#include "../scan.h" -#include "../util.h" -#include "../arc/arc.h" - -#include -#include - - -void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc) { - - size_t buf_len; - void *buf = read_all(f, &buf_len); - - char *out_buf = malloc(buf_len * 2); // TODO: we probably only need 1.2x or 1.5x, even better would be a dynamic buffer - size_t out_buf_used = 0; - - struct archive *rar_in = archive_read_new(); - archive_read_support_filter_none(rar_in); - archive_read_support_format_rar(rar_in); - - archive_read_open_memory(rar_in, buf, buf_len); - - struct archive *zip_out = archive_write_new(); - archive_write_set_format_zip(zip_out); - archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used); - - struct archive_entry *entry; - while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) { - archive_write_header(zip_out, entry); - - char arc_buf[ARC_BUF_SIZE]; - int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); - while (len > 0) { - archive_write_data(zip_out, arc_buf, len); - len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); - } - } - - archive_write_close(zip_out); - archive_write_free(zip_out); - - archive_read_close(rar_in); - archive_read_free(rar_in); - - parse_ebook_mem(&ctx->ebook_ctx, out_buf, out_buf_used, "application/x-cbz", doc); - doc->mime = ctx->cbr_mime; - free(out_buf); - free(buf); -} diff --git a/libscan/cbr/cbr.h b/libscan/cbr/cbr.h deleted file mode 100644 index 40c454f..0000000 --- a/libscan/cbr/cbr.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef SCAN_CBR_H -#define SCAN_CBR_H - -#include -#include "../ebook/ebook.h" - -typedef struct { - scan_ebook_ctx_t ebook_ctx; - unsigned int cbr_mime; - log_callback_t log; - logf_callback_t logf; - store_callback_t store; -} scan_cbr_ctx_t; - -__always_inline -static int is_cbr(scan_cbr_ctx_t *ctx, unsigned int mime) { - return mime == ctx->cbr_mime; -} - -void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc); - -#endif diff --git a/libscan/comic/comic.c b/libscan/comic/comic.c new file mode 100644 index 0000000..4cb5ed0 --- /dev/null +++ b/libscan/comic/comic.c @@ -0,0 +1,44 @@ +#include "comic.h" +#include "../media/media.h" +#include "../arc/arc.h" + +#include +#include + + +void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) { + struct archive *a = NULL; + struct archive_entry *entry = NULL; + arc_data_t arc_data; + + int ret = arc_open(f, &a, &arc_data, TRUE); + if (ret != ARCHIVE_OK) { + CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a)) + archive_read_free(a); + return; + } + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + struct stat info = *archive_entry_stat(entry); + if (S_ISREG(info.st_mode)) { + const char* utf8_name = archive_entry_pathname_utf8(entry); + const char* file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name; + + char *p = strrchr(file_path, '.'); + if (p != NULL && strcmp(p, ".png") == 0 || strcmp(p, ".jpg") == 0 || strcmp(p, ".jpeg") == 0) { + size_t entry_size = archive_entry_size(entry); + void* buf = malloc(entry_size); + archive_read_data(a, buf, entry_size); + + ret = store_image_thumbnail((scan_media_ctx_t*)ctx, buf, entry_size, doc); + free(buf); + + if (ret == TRUE) { + break; + } + } + } + } + + archive_read_free(a); +} diff --git a/libscan/comic/comic.h b/libscan/comic/comic.h new file mode 100644 index 0000000..144a247 --- /dev/null +++ b/libscan/comic/comic.h @@ -0,0 +1,31 @@ +#ifndef SCAN_CBR_H +#define SCAN_CBR_H + +#include +#include "../ebook/ebook.h" + +typedef struct { + log_callback_t log; + logf_callback_t logf; + store_callback_t store; + + int tn_size; + float tn_qscale; + + unsigned int cbr_mime; + unsigned int cbz_mime; +} scan_comic_ctx_t; + +__always_inline +static int is_cbr(scan_comic_ctx_t *ctx, unsigned int mime) { + return mime == ctx->cbr_mime; +} + +__always_inline +static int is_cbz(scan_comic_ctx_t *ctx, unsigned int mime) { + return mime == ctx->cbz_mime; +} + +void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc); + +#endif diff --git a/libscan/media/media.c b/libscan/media/media.c index dcc3ef9..242eb2d 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -40,6 +40,10 @@ static AVCodecContext *alloc_jpeg_encoder(scan_media_ctx_t *ctx, int dstW, int d __always_inline AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) { + if (frame->pict_type == AV_PICTURE_TYPE_NONE) { + return NULL; + } + int dstW; int dstH; if (frame->width <= size && frame->height <= size) { @@ -443,7 +447,16 @@ int memfile_open(vfile_t *f, memfile_t *mem) { int ret = f->read(f, mem->buf, mem->info.st_size); mem->file = fmemopen(mem->buf, mem->info.st_size, "rb"); - return ret == mem->info.st_size ? 0 : -1; + return (ret == mem->info.st_size && mem->file != NULL) ? 0 : -1; +} + +int memfile_open_buf(void *buf, size_t buf_len, memfile_t *mem) { + mem->info.st_size = buf_len; + + mem->buf = buf; + mem->file = fmemopen(mem->buf, mem->info.st_size, "rb"); + + return mem->file != NULL ? 0 : -1; } void memfile_close(memfile_t *mem) { @@ -517,3 +530,102 @@ void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc) { void init_media() { av_log_set_level(AV_LOG_QUIET); } + +int store_image_thumbnail(scan_media_ctx_t *ctx, void* buf, size_t buf_len, document_t *doc) { + memfile_t memfile; + AVIOContext *io_ctx = NULL; + + AVFormatContext *pFormatCtx = avformat_alloc_context(); + if (pFormatCtx == NULL) { + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + return FALSE; + } + + unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE); + + int ret = memfile_open_buf(buf, buf_len, &memfile); + if (ret == 0) { + CTX_LOG_DEBUGF(doc->filepath, "Loading media file in memory (%ldB)", buf_len) + io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, &memfile, memfile_read, NULL, memfile_seek); + } else { + avformat_close_input(&pFormatCtx); + avformat_free_context(pFormatCtx); + av_free(io_ctx->buffer); + avio_context_free(&io_ctx); + fclose(memfile.file); + return FALSE; + } + + pFormatCtx->pb = io_ctx; + + int res = avformat_open_input(&pFormatCtx, "", NULL, NULL); + if (res != 0) { + av_free(io_ctx->buffer); + avformat_close_input(&pFormatCtx); + avformat_free_context(pFormatCtx); + avio_context_free(&io_ctx); + fclose(memfile.file); + return FALSE; + } + + AVStream *stream = pFormatCtx->streams[0]; + + // Decoder + AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id); + AVCodecContext *decoder = avcodec_alloc_context3(video_codec); + avcodec_parameters_to_context(decoder, stream->codecpar); + avcodec_open2(decoder, video_codec, NULL); + + AVFrame *frame = read_frame(ctx, pFormatCtx, decoder, 0, doc); + if (frame == NULL) { + avcodec_free_context(&decoder); + avformat_close_input(&pFormatCtx); + avformat_free_context(pFormatCtx); + av_free(io_ctx->buffer); + avio_context_free(&io_ctx); + fclose(memfile.file); + return FALSE; + } + + // Scale frame + AVFrame *scaled_frame = scale_frame(decoder, frame, ctx->tn_size); + + if (scaled_frame == NULL) { + av_frame_free(&frame); + avcodec_free_context(&decoder); + avformat_close_input(&pFormatCtx); + avformat_free_context(pFormatCtx); + av_free(io_ctx->buffer); + avio_context_free(&io_ctx); + fclose(memfile.file); + return FALSE; + } + + // Encode frame to jpeg + AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(ctx, scaled_frame->width, scaled_frame->height, ctx->tn_qscale); + avcodec_send_frame(jpeg_encoder, scaled_frame); + + AVPacket jpeg_packet; + av_init_packet(&jpeg_packet); + avcodec_receive_packet(jpeg_encoder, &jpeg_packet); + + // Save thumbnail + APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height) + ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size); + + av_packet_unref(&jpeg_packet); + av_frame_free(&frame); + av_free(*scaled_frame->data); + av_frame_free(&scaled_frame); + avcodec_free_context(&jpeg_encoder); + avcodec_free_context(&decoder); + + avformat_close_input(&pFormatCtx); + avformat_free_context(pFormatCtx); + + av_free(io_ctx->buffer); + avio_context_free(&io_ctx); + fclose(memfile.file); + + return TRUE; +} diff --git a/libscan/media/media.h b/libscan/media/media.h index fe3ee3b..26091a0 100644 --- a/libscan/media/media.h +++ b/libscan/media/media.h @@ -5,16 +5,18 @@ #include "../scan.h" typedef struct { - int tn_size; - float tn_qscale; - log_callback_t log; logf_callback_t logf; store_callback_t store; + + int tn_size; + float tn_qscale; long max_media_buffer; } scan_media_ctx_t; void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc); void init_media(); +int store_image_thumbnail(scan_media_ctx_t *ctx, void* buf, size_t buf_len, document_t *doc); + #endif diff --git a/libscan/util.h b/libscan/util.h index d6234c5..8d694df 100644 --- a/libscan/util.h +++ b/libscan/util.h @@ -117,12 +117,12 @@ static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) { buf->cur += sizeof(c); } -static void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) { +static void dyn_buffer_write_str(dyn_buffer_t *buf, const char *str) { dyn_buffer_write(buf, str, strlen(str)); dyn_buffer_write_char(buf, '\0'); } -static void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) { +static void dyn_buffer_append_string(dyn_buffer_t *buf, const char *str) { dyn_buffer_write(buf, str, strlen(str)); } diff --git a/test/main.cpp b/test/main.cpp index 95ca84d..732e5f5 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -5,6 +5,7 @@ extern "C" { #include "../libscan/arc/arc.h" #include "../libscan/text/text.h" #include "../libscan/ebook/ebook.h" +#include "../libscan/comic/comic.h" #include "../libscan/media/media.h" #include "../libscan/ooxml/ooxml.h" #include "../libscan/mobi/scan_mobi.h" @@ -20,6 +21,8 @@ static scan_text_ctx_t text_500_ctx; static scan_ebook_ctx_t ebook_ctx; static scan_ebook_ctx_t ebook_500_ctx; +static scan_comic_ctx_t comic_ctx; + static scan_media_ctx_t media_ctx; static scan_ooxml_ctx_t ooxml_500_ctx; @@ -203,25 +206,32 @@ TEST(Ebook, Epub1) { cleanup(&doc, &f); } -TEST(Ebook, ComicCbz) { +/* Comic */ +TEST(Comic, ComicCbz) { vfile_t f; document_t doc; load_doc_file("libscan-test-files/test_files/ebook/lost_treasure.cbz", &f, &doc); - parse_ebook(&ebook_500_ctx, &f, "application/vnd.comicbook+zip", &doc); + size_t size_before = store_size; + + parse_comic(&comic_ctx, &f, &doc); + + ASSERT_NE(size_before, store_size); - //TODO: Check that thumbnail was generated correctly cleanup(&doc, &f); } -TEST(Ebook, ComicCbr) { +TEST(Comic, ComicCbr) { vfile_t f; document_t doc; load_doc_file("libscan-test-files/test_files/ebook/laugh.cbr", &f, &doc); - parse_ebook(&ebook_500_ctx, &f, "application/vnd.comicbook-rar", &doc); + size_t size_before = store_size; + + parse_comic(&comic_ctx, &f, &doc); + + ASSERT_NE(size_before, store_size); - //TODO: Check that thumbnail was generated correctly cleanup(&doc, &f); } @@ -589,6 +599,12 @@ int main(int argc, char **argv) { ebook_500_ctx = ebook_ctx; ebook_500_ctx.content_size = 500; + comic_ctx.tn_qscale = 1.0; + comic_ctx.tn_size = 500; + comic_ctx.log = noop_log; + comic_ctx.logf = noop_logf; + comic_ctx.store = counter_store; + media_ctx.log = noop_log; media_ctx.logf = noop_logf; media_ctx.store = counter_store;