store and log callbacks

This commit is contained in:
simon987 2020-03-29 20:37:46 -04:00
parent e6323e28d0
commit 93e7181378
15 changed files with 146 additions and 94 deletions

View File

@ -3,7 +3,30 @@ cmake_minimum_required(VERSION 3.15)
project(scan C)
set(CMAKE_C_STANDARD 11)
add_library(
scan
libscan/util.c libscan/util.h
libscan/scan.h
libscan/macros.h
libscan/text/text.c libscan/text/text.h
libscan/arc/arc.c libscan/arc/arc.h
libscan/ebook/ebook.c libscan/ebook/ebook.h
libscan/cbr/cbr.c libscan/cbr/cbr.h
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
libscan/media/media.c libscan/media/media.h
libscan/font/font.c libscan/font/font.h
third-party/utf8.h
)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
target_link_directories(scan PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/)
find_package(LibArchive REQUIRED)
find_package(BZip2 REQUIRED)
find_package(lz4 REQUIRED)
find_package(Threads REQUIRED)
find_package(Tesseract CONFIG REQUIRED)
find_package(harfbuzz CONFIG REQUIRED)
@ -11,6 +34,9 @@ find_package(OpenJPEG CONFIG REQUIRED)
find_package(JPEG REQUIRED)
find_package(LibXml2 REQUIRED)
find_package(FFMPEG REQUIRED)
#find_package(OpenSSL REQUIRED)
find_package(LibLZMA REQUIRED)
find_package(ZLIB REQUIRED)
include(ExternalProject)
@ -30,25 +56,11 @@ ExternalProject_Add(
BINARY_DIR "third-party/ext_mupdf/src/mupdf"
BUILD_COMMAND CFLAGS=-fPIC HAVE_CURL=no HAVE_GLUT=no ${MAKE_EXE} -j 4 --silent
&& ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o
&& ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o
)
SET(MUPDF_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/build/release/)
SET(MUPDF_INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/include/)
add_library(
scan
libscan/util.c libscan/util.h
libscan/scan.h
libscan/macros.h
libscan/text/text.c libscan/text/text.h
libscan/arc/arc.c libscan/arc/arc.h
libscan/ebook/ebook.c libscan/ebook/ebook.h
libscan/cbr/cbr.c libscan/cbr/cbr.h
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
libscan/media/media.c libscan/media/media.h
libscan/font/font.c libscan/font/font.h
third-party/utf8.h
)
target_compile_options(
scan
@ -70,33 +82,43 @@ string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
target_link_libraries(
scan
-static
-static-libgcc
-static-libstdc++
${LibArchive_LIBRARIES}
ZLIB::ZLIB
BZip2::BZip2
lz4::lz4
zstd
lzo2
LibLZMA::LibLZMA
freetype
# OpenSSL::SSL OpenSSL::Crypto
stdc++
-Wl,--whole-archive
m
-Wl,--no-whole-archive
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf.a"
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf-third.a"
"${MUPDF_LIB_DIR}/libmupdf.a"
"${MUPDF_LIB_DIR}/libmupdf-third.a"
${JPEG_LIBRARIES}
${LibArchive_LIBRARIES}
${Tesseract_LIBRARIES}
${LIBXML2_LIBRARIES}
${FFMPEG_LIBRARIES}
z
${CMAKE_THREAD_LIBS_INIT}
# TODO: Looks like I don't need to explicitly link to libuuid?
uuid
)
target_include_directories(
scan
BEFORE
PRIVATE
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/include/"
PUBLIC
${MUPDF_INC_DIR}
${JPEG_INCLUDE_DIR}
${LIBXML2_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIR}

View File

@ -1,7 +1,7 @@
*(wip)*
```bash
vcpkg install libarchive pthread tesseract libxml2 ffmpeg
vcpkg install libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd
cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake .
make -j 4

View File

@ -74,7 +74,6 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = f;
@ -103,8 +102,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
}
if (ret != ARCHIVE_OK) {
//TODO: log
// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
CTX_LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a);
return SCAN_ERR_READ;
}

View File

@ -13,6 +13,10 @@ typedef int archive_mode_t;
typedef struct {
archive_mode_t mode;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_arc_ctx_t;
#define ARC_BUF_SIZE 8192

View File

@ -5,7 +5,9 @@
#include "../scan.h"
typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_cbr_ctx_t;
void cbr_init();

View File

@ -26,7 +26,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
if (err != 0) {
fz_drop_page(fzctx, cover);
// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
return FALSE;
}
@ -65,7 +65,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
err = fzctx->error.errcode;
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
fz_drop_page(fzctx, cover);
fz_drop_pixmap(fzctx, pixmap);
return FALSE;
@ -83,7 +83,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
if (err == 0) {
unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(fzctx, fzbuf, &tn_buf);
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
}
fz_drop_buffer(fzctx, fzbuf);
@ -91,19 +91,17 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
fz_drop_page(fzctx, cover);
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
// ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
fzctx->error.message)
return FALSE;
}
return TRUE;
}
void fz_err_callback(void *user, UNUSED(const char *message)) {
// if (LogCtx.verbose) {
// document_t *doc = (document_t *) user;
// LOG_WARNINGF(doc->filepath, "FZ: %s", message)
// }
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
thread_ctx.logf(doc->filepath, LEVEL_WARNING,"FZ: %s", message);
}
static void init_fzctx(fz_context *fzctx, document_t *doc) {
@ -158,11 +156,6 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
// LOG_DEBUGF(
// "ebook.c",
// "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
// pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
// )
}
TessBaseAPIEnd(api);
@ -230,7 +223,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
err = fzctx->error.errcode;
if (err) {
// LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc);
fz_drop_context(fzctx);
@ -260,7 +253,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
fz_catch(fzctx)
err = fzctx->error.errcode;
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(fzctx, page);
fz_drop_stream(fzctx, stream);
@ -293,7 +286,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
err = fzctx->error.errcode;
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message)
CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
text_buffer_destroy(&thread_buffer);
fz_drop_page(fzctx, page);
fz_drop_stext_page(fzctx, stext);

View File

@ -9,6 +9,10 @@ typedef struct {
const char *tesseract_lang;
const char *tesseract_path;
pthread_mutex_t mupdf_mutex;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_ebook_ctx_t;
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc);

View File

@ -149,7 +149,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) {
// LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err));
CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, FT_Error_String(err))
return;
}
@ -180,7 +180,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err))
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, FT_Error_String(err))
FT_Done_Face(face);
return;
}
@ -200,7 +200,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) {
// LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err));
CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, FT_Error_String(err))
continue;
}
}
@ -221,7 +221,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);

View File

@ -6,6 +6,9 @@
typedef struct {
int enable_tn;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_font_cxt_t;
void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc);

View File

@ -14,7 +14,7 @@
#define AVIO_BUF_SIZE 8192
__always_inline
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
static AVCodecContext *alloc_jpeg_encoder(scan_media_ctx_t *ctx, int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
@ -28,7 +28,7 @@ static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret));
CTX_LOG_WARNINGF("media.c", "Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL;
}
@ -60,7 +60,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext(
struct SwsContext *sws_ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0
@ -71,7 +71,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx,
sws_scale(sws_ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height,
scaled_frame->data, scaled_frame->linesize
@ -81,13 +81,13 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx);
sws_freeContext(sws_ctx);
return scaled_frame;
}
__always_inline
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
static AVFrame *read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc();
AVPacket avPacket;
@ -101,10 +101,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder,
if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) {
// LOG_WARNINGF(doc->filepath,
// "(media.c) avcodec_read_frame() returned error code [%d] %s",
// read_frame_ret, av_err2str(read_frame_ret)
// )
CTX_LOG_WARNINGF(doc->filepath,
"(media.c) avcodec_read_frame() returned error code [%d] %s",
read_frame_ret, av_err2str(read_frame_ret)
)
}
av_frame_free(&frame);
av_packet_unref(&avPacket);
@ -122,10 +122,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder,
// Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) {
// LOG_ERRORF(doc->filepath,
// "(media.c) avcodec_send_packet() returned error code [%d] %s",
// decode_ret, av_err2str(decode_ret)
// )
CTX_LOG_ERRORF(doc->filepath,
"(media.c) avcodec_send_packet() returned error code [%d] %s",
decode_ret, av_err2str(decode_ret)
)
av_frame_free(&frame);
av_packet_unref(&avPacket);
return NULL;
@ -264,10 +264,6 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
APPEND_META(doc, meta_vid)
}
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
APPEND_META(doc, meta_audio)
meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth;
meta_w->int_val = stream->codecpar->width;
@ -310,7 +306,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
}
}
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc);
AVFrame *frame = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
@ -332,7 +328,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
}
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ctx->tn_qscale);
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(ctx, scaled_frame->width, scaled_frame->height, ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
@ -340,8 +336,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data,
// jpeg_packet.size);
ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_frame_free(&frame);
@ -359,12 +354,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) {
// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
@ -389,7 +384,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) {
// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return;
}
@ -408,7 +403,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
avformat_free_context(pFormatCtx);
return;
} else if (res < 0) {
// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer);
avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx);

View File

@ -4,13 +4,14 @@
#include "../scan.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
typedef struct {
long content_size;
int tn_size;
float tn_qscale;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_media_ctx_t;
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@ -9,8 +9,6 @@
__always_inline
static int should_read_part(const char *part) {
// LOG_DEBUGF("ooxml.c", "Got part : %s", part)
if (part == NULL) {
return FALSE;
}
@ -35,15 +33,15 @@ static int should_read_part(const char *part) {
return FALSE;
}
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
int extract_text(scan_ooxml_cxt_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
xmlErrorPtr err = xmlGetLastError();
if (err != NULL) {
if (err->level == XML_ERR_FATAL) {
// LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
return -1;
} else {
// LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message)
CTX_LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message)
}
}
@ -58,7 +56,7 @@ int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
}
}
extract_text(xml, child->children, buf);
extract_text(ctx, xml, child->children, buf);
}
return 0;
}
@ -74,23 +72,23 @@ int xml_io_close(UNUSED(void *context)) {
}
__always_inline
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) {
static int read_part(scan_ooxml_cxt_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (xml == NULL) {
// LOG_ERROR(doc->filepath, "Could not parse XML")
CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
return -1;
}
xmlNode *root = xmlDocGetRootElement(xml);
if (root == NULL) {
// LOG_ERROR(doc->filepath, "Empty document")
CTX_LOG_ERROR(doc->filepath, "Empty document")
xmlFreeDoc(xml);
return -1;
}
extract_text(xml, root, buf);
extract_text(ctx, xml, root, buf);
xmlFreeDoc(xml);
return 0;
@ -106,7 +104,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) {
int ret = archive_read_open_memory(a, buf, buf_len);
if (ret != ARCHIVE_OK) {
// LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
archive_read_free(a);
return;
}
@ -119,7 +117,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) {
const char *path = archive_entry_pathname(entry);
if (should_read_part(path)) {
ret = read_part(a, &tex, doc);
ret = read_part(ctx, a, &tex, doc);
if (ret != 0) {
break;
}

View File

@ -6,6 +6,9 @@
typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_ooxml_cxt_t;
void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc);

View File

@ -32,6 +32,31 @@ typedef int scan_code_t;
#define SCAN_OK (scan_code_t) 0
#define SCAN_ERR_READ (scan_code_t) -1
#define LEVEL_DEBUG 0
#define LEVEL_INFO 1
#define LEVEL_WARNING 2
#define LEVEL_ERROR 3
#define LEVEL_FATAL 4
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(char *filepath, int level, char *str);
// This is written to file as a 16-bit int!
enum metakey {
MetaContent = META_STR(1),

View File

@ -6,6 +6,10 @@
typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_text_ctx_t;
scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc);