From c48717b0a66b646dd54163c57309b03087d00208 Mon Sep 17 00:00:00 2001 From: simon987 Date: Thu, 26 Mar 2020 09:40:04 -0400 Subject: [PATCH] cbr component --- CMakeLists.txt | 1 + libscan/cbr/cbr.c | 65 +++++++++++++++++++++++++++++++++++++++++++ libscan/cbr/cbr.h | 17 +++++++++++ libscan/ebook/ebook.c | 2 +- libscan/ebook/ebook.h | 4 +-- 5 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 libscan/cbr/cbr.c create mode 100644 libscan/cbr/cbr.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f7d4856..3ef7dd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ add_library( libscan/text/text.c libscan/text/text.h libscan/arc/arc.c libscan/arc/arc.h libscan/ebook/ebook.c libscan/ebook/ebook.h + libscan/cbr/cbr.c libscan/cbr/cbr.h third-party/utf8.h ) diff --git a/libscan/cbr/cbr.c b/libscan/cbr/cbr.c new file mode 100644 index 0000000..970b0a5 --- /dev/null +++ b/libscan/cbr/cbr.c @@ -0,0 +1,65 @@ +#include "cbr.h" +#include "../scan.h" +#include "../util.h" +#include "../arc/arc.h" +#include "../ebook/ebook.h" + +#include +#include + +unsigned int cbr_mime; +unsigned int cbz_mime; + +void cbr_init() { + //TODO: get mime str +// cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr"); +// cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz"); +} + +int is_cbr(unsigned int mime) { + return mime == cbr_mime; +} + +void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc) { + + size_t buf_len; + void *buf = read_all(f, &buf_len); + + char *out_buf = malloc(buf_len * 2); // TODO: we probably only need 1.2x or 1.5x, even better would be a dynamic buffer + size_t out_buf_used = 0; + + struct archive *rar_in = archive_read_new(); + archive_read_support_filter_none(rar_in); + archive_read_support_format_rar(rar_in); + + archive_read_open_memory(rar_in, buf, buf_len); + + struct archive *zip_out = archive_write_new(); + archive_write_set_format_zip(zip_out); + archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used); + + struct archive_entry *entry; + while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) { + archive_write_header(zip_out, entry); + + char arc_buf[ARC_BUF_SIZE]; + int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); + while (len > 0) { + archive_write_data(zip_out, arc_buf, len); + len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); + } + } + + archive_write_close(zip_out); + archive_write_free(zip_out); + + archive_read_close(rar_in); + archive_read_free(rar_in); + + doc->mime = cbz_mime; + + //TODO: get mime string +// parse_ebook(out_buf, out_buf_used, doc); + doc->mime = cbr_mime; + free(out_buf); +} diff --git a/libscan/cbr/cbr.h b/libscan/cbr/cbr.h new file mode 100644 index 0000000..97d258d --- /dev/null +++ b/libscan/cbr/cbr.h @@ -0,0 +1,17 @@ +#ifndef SCAN_CBR_H +#define SCAN_CBR_H + +#include +#include "../scan.h" + +typedef struct { + +} scan_cbr_ctx_t; + +void cbr_init(); + +int is_cbr(unsigned int mime); + +void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc); + +#endif diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index 10db675..b4c1d68 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -159,7 +159,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), if (len >= MIN_OCR_LEN) { text_buffer_append_string(&thread_buffer, text, len - 1); // LOG_DEBUGF( -// "pdf.c", +// "ebook.c", // "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB", // pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur // ) diff --git a/libscan/ebook/ebook.h b/libscan/ebook/ebook.h index 7621e7f..d0bfbc0 100644 --- a/libscan/ebook/ebook.h +++ b/libscan/ebook/ebook.h @@ -1,5 +1,5 @@ -#ifndef SCAN_PDF_H -#define SCAN_PDF_H +#ifndef SCAN_EBOOK_H +#define SCAN_EBOOK_H #include "../scan.h"