diff --git a/CMakeLists.txt b/CMakeLists.txt index a3c1e5d..71df8dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,30 @@ cmake_minimum_required(VERSION 3.15) project(scan C) set(CMAKE_C_STANDARD 11) +add_library( + scan + libscan/util.c libscan/util.h + libscan/scan.h + libscan/macros.h + + libscan/text/text.c libscan/text/text.h + libscan/arc/arc.c libscan/arc/arc.h + libscan/ebook/ebook.c libscan/ebook/ebook.h + libscan/cbr/cbr.c libscan/cbr/cbr.h + libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h + libscan/media/media.c libscan/media/media.h + libscan/font/font.c libscan/font/font.h + + third-party/utf8.h +) + +set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) +target_link_directories(scan PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/) + find_package(LibArchive REQUIRED) +find_package(BZip2 REQUIRED) +find_package(lz4 REQUIRED) + find_package(Threads REQUIRED) find_package(Tesseract CONFIG REQUIRED) find_package(harfbuzz CONFIG REQUIRED) @@ -11,6 +34,9 @@ find_package(OpenJPEG CONFIG REQUIRED) find_package(JPEG REQUIRED) find_package(LibXml2 REQUIRED) find_package(FFMPEG REQUIRED) +#find_package(OpenSSL REQUIRED) +find_package(LibLZMA REQUIRED) +find_package(ZLIB REQUIRED) include(ExternalProject) @@ -30,25 +56,11 @@ ExternalProject_Add( BINARY_DIR "third-party/ext_mupdf/src/mupdf" BUILD_COMMAND CFLAGS=-fPIC HAVE_CURL=no HAVE_GLUT=no ${MAKE_EXE} -j 4 --silent - && ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o + && ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o ) +SET(MUPDF_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/build/release/) +SET(MUPDF_INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/include/) -add_library( - scan - libscan/util.c libscan/util.h - libscan/scan.h - libscan/macros.h - - libscan/text/text.c libscan/text/text.h - libscan/arc/arc.c libscan/arc/arc.h - libscan/ebook/ebook.c libscan/ebook/ebook.h - libscan/cbr/cbr.c libscan/cbr/cbr.h - libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h - libscan/media/media.c libscan/media/media.h - libscan/font/font.c libscan/font/font.h - - third-party/utf8.h -) target_compile_options( scan @@ -70,33 +82,43 @@ string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}") target_link_libraries( scan - -static - -static-libgcc - -static-libstdc++ + ${LibArchive_LIBRARIES} + ZLIB::ZLIB + BZip2::BZip2 + lz4::lz4 + zstd + lzo2 + LibLZMA::LibLZMA + + freetype + + # OpenSSL::SSL OpenSSL::Crypto + + stdc++ -Wl,--whole-archive m -Wl,--no-whole-archive - "${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf.a" - "${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf-third.a" + "${MUPDF_LIB_DIR}/libmupdf.a" + "${MUPDF_LIB_DIR}/libmupdf-third.a" ${JPEG_LIBRARIES} - ${LibArchive_LIBRARIES} ${Tesseract_LIBRARIES} ${LIBXML2_LIBRARIES} ${FFMPEG_LIBRARIES} + z ${CMAKE_THREAD_LIBS_INIT} - # TODO: Looks like I don't need to explicitly link to libuuid? + uuid ) target_include_directories( scan BEFORE - PRIVATE - "${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/include/" + PUBLIC + ${MUPDF_INC_DIR} ${JPEG_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ${FFMPEG_INCLUDE_DIR} diff --git a/README.md b/README.md index ea3e8d8..0d70ad4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ *(wip)* ```bash -vcpkg install libarchive pthread tesseract libxml2 ffmpeg +vcpkg install libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . make -j 4 diff --git a/libscan/arc/arc.c b/libscan/arc/arc.c index 2e86672..cf0db58 100644 --- a/libscan/arc/arc.c +++ b/libscan/arc/arc.c @@ -74,7 +74,6 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { struct archive *a; struct archive_entry *entry; - arc_data_f data; data.f = f; @@ -103,8 +102,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { } if (ret != ARCHIVE_OK) { - //TODO: log -// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) + CTX_LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) archive_read_free(a); return SCAN_ERR_READ; } diff --git a/libscan/arc/arc.h b/libscan/arc/arc.h index 3c0e95f..43c347f 100644 --- a/libscan/arc/arc.h +++ b/libscan/arc/arc.h @@ -13,6 +13,10 @@ typedef int archive_mode_t; typedef struct { archive_mode_t mode; + + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_arc_ctx_t; #define ARC_BUF_SIZE 8192 diff --git a/libscan/cbr/cbr.h b/libscan/cbr/cbr.h index 97d258d..46c3ae2 100644 --- a/libscan/cbr/cbr.h +++ b/libscan/cbr/cbr.h @@ -5,7 +5,9 @@ #include "../scan.h" typedef struct { - + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_cbr_ctx_t; void cbr_init(); diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index b4c1d68..c020863 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -26,7 +26,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d if (err != 0) { fz_drop_page(fzctx, cover); -// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) return FALSE; } @@ -65,7 +65,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d err = fzctx->error.errcode; if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) fz_drop_page(fzctx, cover); fz_drop_pixmap(fzctx, pixmap); return FALSE; @@ -83,7 +83,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d if (err == 0) { unsigned char *tn_buf; size_t tn_len = fz_buffer_storage(fzctx, fzbuf, &tn_buf); -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len); + ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len); } fz_drop_buffer(fzctx, fzbuf); @@ -91,19 +91,17 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d fz_drop_page(fzctx, cover); if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err, -// ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err, + fzctx->error.message) return FALSE; } return TRUE; } -void fz_err_callback(void *user, UNUSED(const char *message)) { -// if (LogCtx.verbose) { -// document_t *doc = (document_t *) user; -// LOG_WARNINGF(doc->filepath, "FZ: %s", message) -// } +void fz_err_callback(void *user, const char *message) { + document_t *doc = (document_t *) user; + thread_ctx.logf(doc->filepath, LEVEL_WARNING,"FZ: %s", message); } static void init_fzctx(fz_context *fzctx, document_t *doc) { @@ -158,11 +156,6 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), size_t len = strlen(text); if (len >= MIN_OCR_LEN) { text_buffer_append_string(&thread_buffer, text, len - 1); -// LOG_DEBUGF( -// "ebook.c", -// "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB", -// pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur -// ) } TessBaseAPIEnd(api); @@ -230,7 +223,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum err = fzctx->error.errcode; if (err) { -// LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message) fz_drop_stream(fzctx, stream); fz_drop_document(fzctx, fzdoc); fz_drop_context(fzctx); @@ -260,7 +253,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum fz_catch(fzctx) err = fzctx->error.errcode; if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message) text_buffer_destroy(&thread_buffer); fz_drop_page(fzctx, page); fz_drop_stream(fzctx, stream); @@ -293,7 +286,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum err = fzctx->error.errcode; if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) + CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message) text_buffer_destroy(&thread_buffer); fz_drop_page(fzctx, page); fz_drop_stext_page(fzctx, stext); diff --git a/libscan/ebook/ebook.h b/libscan/ebook/ebook.h index d0bfbc0..236ecd2 100644 --- a/libscan/ebook/ebook.h +++ b/libscan/ebook/ebook.h @@ -9,6 +9,10 @@ typedef struct { const char *tesseract_lang; const char *tesseract_path; pthread_mutex_t mupdf_mutex; + + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_ebook_ctx_t; void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc); diff --git a/libscan/font/font.c b/libscan/font/font.c index c0947ea..cd0111b 100644 --- a/libscan/font/font.c +++ b/libscan/font/font.c @@ -149,7 +149,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) { FT_Face face; FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face); if (err != 0) { -// LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err)); + CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, FT_Error_String(err)) return; } @@ -180,7 +180,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) { err = FT_Set_Pixel_Sizes(face, 0, pixel); if (err != 0) { -// LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err)) + CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, FT_Error_String(err)) FT_Done_Face(face); return; } @@ -200,7 +200,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) { c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); if (err != 0) { -// LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err)); + CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, FT_Error_String(err)) continue; } } @@ -221,7 +221,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) { dyn_buffer_t bmp_data = dyn_buffer_create(); bmp_format(&bmp_data, dimensions, bitmap); -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur); + ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur); dyn_buffer_destroy(&bmp_data); free(bitmap); diff --git a/libscan/font/font.h b/libscan/font/font.h index 01e6b03..8eea561 100644 --- a/libscan/font/font.h +++ b/libscan/font/font.h @@ -6,6 +6,9 @@ typedef struct { int enable_tn; + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_font_cxt_t; void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc); diff --git a/libscan/media/media.c b/libscan/media/media.c index 7b581c2..ba8ae78 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -14,7 +14,7 @@ #define AVIO_BUF_SIZE 8192 __always_inline -static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { +static AVCodecContext *alloc_jpeg_encoder(scan_media_ctx_t *ctx, int dstW, int dstH, float qscale) { AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec); @@ -28,7 +28,7 @@ static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { int ret = avcodec_open2(jpeg, jpeg_codec, NULL); if (ret != 0) { - printf("Could not open jpeg encoder: %s!\n", av_err2str(ret)); + CTX_LOG_WARNINGF("media.c", "Could not open jpeg encoder: %s!\n", av_err2str(ret)); return NULL; } @@ -60,7 +60,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si AVFrame *scaled_frame = av_frame_alloc(); - struct SwsContext *ctx = sws_getContext( + struct SwsContext *sws_ctx = sws_getContext( decoder->width, decoder->height, decoder->pix_fmt, dstW, dstH, AV_PIX_FMT_YUVJ420P, SWS_FAST_BILINEAR, 0, 0, 0 @@ -71,7 +71,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); - sws_scale(ctx, + sws_scale(sws_ctx, (const uint8_t *const *) frame->data, frame->linesize, 0, decoder->height, scaled_frame->data, scaled_frame->linesize @@ -81,13 +81,13 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si scaled_frame->height = dstH; scaled_frame->format = AV_PIX_FMT_YUV420P; - sws_freeContext(ctx); + sws_freeContext(sws_ctx); return scaled_frame; } __always_inline -static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) { +static AVFrame *read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) { AVFrame *frame = av_frame_alloc(); AVPacket avPacket; @@ -101,10 +101,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, if (read_frame_ret != 0) { if (read_frame_ret != AVERROR_EOF) { -// LOG_WARNINGF(doc->filepath, -// "(media.c) avcodec_read_frame() returned error code [%d] %s", -// read_frame_ret, av_err2str(read_frame_ret) -// ) + CTX_LOG_WARNINGF(doc->filepath, + "(media.c) avcodec_read_frame() returned error code [%d] %s", + read_frame_ret, av_err2str(read_frame_ret) + ) } av_frame_free(&frame); av_packet_unref(&avPacket); @@ -122,10 +122,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, // Feed it to decoder int decode_ret = avcodec_send_packet(decoder, &avPacket); if (decode_ret != 0) { -// LOG_ERRORF(doc->filepath, -// "(media.c) avcodec_send_packet() returned error code [%d] %s", -// decode_ret, av_err2str(decode_ret) -// ) + CTX_LOG_ERRORF(doc->filepath, + "(media.c) avcodec_send_packet() returned error code [%d] %s", + decode_ret, av_err2str(decode_ret) + ) av_frame_free(&frame); av_packet_unref(&avPacket); return NULL; @@ -264,10 +264,6 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, APPEND_META(doc, meta_vid) } - meta_line_t *meta_audio = malloc(sizeof(meta_line_t)); - meta_audio->key = MetaMediaAudioCodec; - APPEND_META(doc, meta_audio) - meta_line_t *meta_w = malloc(sizeof(meta_line_t)); meta_w->key = MetaWidth; meta_w->int_val = stream->codecpar->width; @@ -310,7 +306,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, } } - AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc); + AVFrame *frame = read_frame(ctx, pFormatCtx, decoder, video_stream, doc); if (frame == NULL) { avcodec_free_context(&decoder); avformat_close_input(&pFormatCtx); @@ -332,7 +328,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, } // Encode frame to jpeg - AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ctx->tn_qscale); + AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(ctx, scaled_frame->width, scaled_frame->height, ctx->tn_qscale); avcodec_send_frame(jpeg_encoder, scaled_frame); AVPacket jpeg_packet; @@ -340,8 +336,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, avcodec_receive_packet(jpeg_encoder, &jpeg_packet); // Save thumbnail -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, -// jpeg_packet.size); + ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size); av_packet_unref(&jpeg_packet); av_frame_free(&frame); @@ -359,12 +354,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_ AVFormatContext *pFormatCtx = avformat_alloc_context(); if (pFormatCtx == NULL) { -// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") return; } int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); if (res < 0) { -// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) + CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) avformat_close_input(&pFormatCtx); avformat_free_context(pFormatCtx); return; @@ -389,7 +384,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) AVFormatContext *pFormatCtx = avformat_alloc_context(); if (pFormatCtx == NULL) { -// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") + CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") return; } @@ -408,7 +403,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) avformat_free_context(pFormatCtx); return; } else if (res < 0) { -// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) + CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) av_free(io_ctx->buffer); avio_context_free(&io_ctx); avformat_close_input(&pFormatCtx); diff --git a/libscan/media/media.h b/libscan/media/media.h index 7874316..71cf3b1 100644 --- a/libscan/media/media.h +++ b/libscan/media/media.h @@ -4,13 +4,14 @@ #include "../scan.h" -#define MIN_VIDEO_SIZE 1024 * 64 -#define MIN_IMAGE_SIZE 1024 * 2 - typedef struct { long content_size; int tn_size; float tn_qscale; + + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_media_ctx_t; void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc); diff --git a/libscan/ooxml/ooxml.c b/libscan/ooxml/ooxml.c index 11beb19..36efdb6 100644 --- a/libscan/ooxml/ooxml.c +++ b/libscan/ooxml/ooxml.c @@ -9,8 +9,6 @@ __always_inline static int should_read_part(const char *part) { -// LOG_DEBUGF("ooxml.c", "Got part : %s", part) - if (part == NULL) { return FALSE; } @@ -35,15 +33,15 @@ static int should_read_part(const char *part) { return FALSE; } -int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { +int extract_text(scan_ooxml_cxt_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't xmlErrorPtr err = xmlGetLastError(); if (err != NULL) { if (err->level == XML_ERR_FATAL) { -// LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) + CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) return -1; } else { -// LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message) + CTX_LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message) } } @@ -58,7 +56,7 @@ int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { } } - extract_text(xml, child->children, buf); + extract_text(ctx, xml, child->children, buf); } return 0; } @@ -74,23 +72,23 @@ int xml_io_close(UNUSED(void *context)) { } __always_inline -static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) { +static int read_part(scan_ooxml_cxt_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) { xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); if (xml == NULL) { -// LOG_ERROR(doc->filepath, "Could not parse XML") + CTX_LOG_ERROR(doc->filepath, "Could not parse XML") return -1; } xmlNode *root = xmlDocGetRootElement(xml); if (root == NULL) { -// LOG_ERROR(doc->filepath, "Empty document") + CTX_LOG_ERROR(doc->filepath, "Empty document") xmlFreeDoc(xml); return -1; } - extract_text(xml, root, buf); + extract_text(ctx, xml, root, buf); xmlFreeDoc(xml); return 0; @@ -106,7 +104,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) { int ret = archive_read_open_memory(a, buf, buf_len); if (ret != ARCHIVE_OK) { -// LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) + CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) archive_read_free(a); return; } @@ -119,7 +117,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) { const char *path = archive_entry_pathname(entry); if (should_read_part(path)) { - ret = read_part(a, &tex, doc); + ret = read_part(ctx, a, &tex, doc); if (ret != 0) { break; } diff --git a/libscan/ooxml/ooxml.h b/libscan/ooxml/ooxml.h index 369288c..039dd02 100644 --- a/libscan/ooxml/ooxml.h +++ b/libscan/ooxml/ooxml.h @@ -6,6 +6,9 @@ typedef struct { long content_size; + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_ooxml_cxt_t; void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc); diff --git a/libscan/scan.h b/libscan/scan.h index 2df35bd..9778c58 100644 --- a/libscan/scan.h +++ b/libscan/scan.h @@ -32,6 +32,31 @@ typedef int scan_code_t; #define SCAN_OK (scan_code_t) 0 #define SCAN_ERR_READ (scan_code_t) -1 +#define LEVEL_DEBUG 0 +#define LEVEL_INFO 1 +#define LEVEL_WARNING 2 +#define LEVEL_ERROR 3 +#define LEVEL_FATAL 4 + +#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__); +#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str); + +#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__); +#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str); + +#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__); +#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str); + +#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__); +#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str); + +#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1); +#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1); + +typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len); +typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...); +typedef void (*log_callback_t)(char *filepath, int level, char *str); + // This is written to file as a 16-bit int! enum metakey { MetaContent = META_STR(1), diff --git a/libscan/text/text.h b/libscan/text/text.h index aa1ea29..bf11bdc 100644 --- a/libscan/text/text.h +++ b/libscan/text/text.h @@ -6,6 +6,10 @@ typedef struct { long content_size; + + log_callback_t log; + logf_callback_t logf; + store_callback_t store; } scan_text_ctx_t; scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc);