store and log callbacks

This commit is contained in:
simon987 2020-03-29 20:37:46 -04:00
parent e6323e28d0
commit 93e7181378
15 changed files with 146 additions and 94 deletions

View File

@ -3,7 +3,30 @@ cmake_minimum_required(VERSION 3.15)
project(scan C) project(scan C)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
add_library(
scan
libscan/util.c libscan/util.h
libscan/scan.h
libscan/macros.h
libscan/text/text.c libscan/text/text.h
libscan/arc/arc.c libscan/arc/arc.h
libscan/ebook/ebook.c libscan/ebook/ebook.h
libscan/cbr/cbr.c libscan/cbr/cbr.h
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
libscan/media/media.c libscan/media/media.h
libscan/font/font.c libscan/font/font.h
third-party/utf8.h
)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
target_link_directories(scan PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/)
find_package(LibArchive REQUIRED) find_package(LibArchive REQUIRED)
find_package(BZip2 REQUIRED)
find_package(lz4 REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
find_package(Tesseract CONFIG REQUIRED) find_package(Tesseract CONFIG REQUIRED)
find_package(harfbuzz CONFIG REQUIRED) find_package(harfbuzz CONFIG REQUIRED)
@ -11,6 +34,9 @@ find_package(OpenJPEG CONFIG REQUIRED)
find_package(JPEG REQUIRED) find_package(JPEG REQUIRED)
find_package(LibXml2 REQUIRED) find_package(LibXml2 REQUIRED)
find_package(FFMPEG REQUIRED) find_package(FFMPEG REQUIRED)
#find_package(OpenSSL REQUIRED)
find_package(LibLZMA REQUIRED)
find_package(ZLIB REQUIRED)
include(ExternalProject) include(ExternalProject)
@ -30,25 +56,11 @@ ExternalProject_Add(
BINARY_DIR "third-party/ext_mupdf/src/mupdf" BINARY_DIR "third-party/ext_mupdf/src/mupdf"
BUILD_COMMAND CFLAGS=-fPIC HAVE_CURL=no HAVE_GLUT=no ${MAKE_EXE} -j 4 --silent BUILD_COMMAND CFLAGS=-fPIC HAVE_CURL=no HAVE_GLUT=no ${MAKE_EXE} -j 4 --silent
&& ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o && ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o
) )
SET(MUPDF_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/build/release/)
SET(MUPDF_INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/include/)
add_library(
scan
libscan/util.c libscan/util.h
libscan/scan.h
libscan/macros.h
libscan/text/text.c libscan/text/text.h
libscan/arc/arc.c libscan/arc/arc.h
libscan/ebook/ebook.c libscan/ebook/ebook.h
libscan/cbr/cbr.c libscan/cbr/cbr.h
libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h
libscan/media/media.c libscan/media/media.h
libscan/font/font.c libscan/font/font.h
third-party/utf8.h
)
target_compile_options( target_compile_options(
scan scan
@ -70,33 +82,43 @@ string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
target_link_libraries( target_link_libraries(
scan scan
-static ${LibArchive_LIBRARIES}
-static-libgcc ZLIB::ZLIB
-static-libstdc++ BZip2::BZip2
lz4::lz4
zstd
lzo2
LibLZMA::LibLZMA
freetype
# OpenSSL::SSL OpenSSL::Crypto
stdc++
-Wl,--whole-archive -Wl,--whole-archive
m m
-Wl,--no-whole-archive -Wl,--no-whole-archive
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf.a" "${MUPDF_LIB_DIR}/libmupdf.a"
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/build/release/libmupdf-third.a" "${MUPDF_LIB_DIR}/libmupdf-third.a"
${JPEG_LIBRARIES} ${JPEG_LIBRARIES}
${LibArchive_LIBRARIES}
${Tesseract_LIBRARIES} ${Tesseract_LIBRARIES}
${LIBXML2_LIBRARIES} ${LIBXML2_LIBRARIES}
${FFMPEG_LIBRARIES} ${FFMPEG_LIBRARIES}
z
${CMAKE_THREAD_LIBS_INIT} ${CMAKE_THREAD_LIBS_INIT}
# TODO: Looks like I don't need to explicitly link to libuuid? uuid
) )
target_include_directories( target_include_directories(
scan scan
BEFORE BEFORE
PRIVATE PUBLIC
"${CMAKE_SOURCE_DIR}/third-party/ext_mupdf/src/mupdf/include/" ${MUPDF_INC_DIR}
${JPEG_INCLUDE_DIR} ${JPEG_INCLUDE_DIR}
${LIBXML2_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIR} ${FFMPEG_INCLUDE_DIR}

View File

@ -1,7 +1,7 @@
*(wip)* *(wip)*
```bash ```bash
vcpkg install libarchive pthread tesseract libxml2 ffmpeg vcpkg install libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd
cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake .
make -j 4 make -j 4

View File

@ -74,7 +74,6 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive *a; struct archive *a;
struct archive_entry *entry; struct archive_entry *entry;
arc_data_f data; arc_data_f data;
data.f = f; data.f = f;
@ -103,8 +102,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
} }
if (ret != ARCHIVE_OK) { if (ret != ARCHIVE_OK) {
//TODO: log CTX_LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a); archive_read_free(a);
return SCAN_ERR_READ; return SCAN_ERR_READ;
} }

View File

@ -13,6 +13,10 @@ typedef int archive_mode_t;
typedef struct { typedef struct {
archive_mode_t mode; archive_mode_t mode;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_arc_ctx_t; } scan_arc_ctx_t;
#define ARC_BUF_SIZE 8192 #define ARC_BUF_SIZE 8192

View File

@ -5,7 +5,9 @@
#include "../scan.h" #include "../scan.h"
typedef struct { typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_cbr_ctx_t; } scan_cbr_ctx_t;
void cbr_init(); void cbr_init();

View File

@ -26,7 +26,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
if (err != 0) { if (err != 0) {
fz_drop_page(fzctx, cover); fz_drop_page(fzctx, cover);
// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
return FALSE; return FALSE;
} }
@ -65,7 +65,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
err = fzctx->error.errcode; err = fzctx->error.errcode;
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
fz_drop_page(fzctx, cover); fz_drop_page(fzctx, cover);
fz_drop_pixmap(fzctx, pixmap); fz_drop_pixmap(fzctx, pixmap);
return FALSE; return FALSE;
@ -83,7 +83,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
if (err == 0) { if (err == 0) {
unsigned char *tn_buf; unsigned char *tn_buf;
size_t tn_len = fz_buffer_storage(fzctx, fzbuf, &tn_buf); size_t tn_len = fz_buffer_storage(fzctx, fzbuf, &tn_buf);
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len); ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len);
} }
fz_drop_buffer(fzctx, fzbuf); fz_drop_buffer(fzctx, fzbuf);
@ -91,19 +91,17 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
fz_drop_page(fzctx, cover); fz_drop_page(fzctx, cover);
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err, CTX_LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err,
// ctx->error.message) fzctx->error.message)
return FALSE; return FALSE;
} }
return TRUE; return TRUE;
} }
void fz_err_callback(void *user, UNUSED(const char *message)) { void fz_err_callback(void *user, const char *message) {
// if (LogCtx.verbose) { document_t *doc = (document_t *) user;
// document_t *doc = (document_t *) user; thread_ctx.logf(doc->filepath, LEVEL_WARNING,"FZ: %s", message);
// LOG_WARNINGF(doc->filepath, "FZ: %s", message)
// }
} }
static void init_fzctx(fz_context *fzctx, document_t *doc) { static void init_fzctx(fz_context *fzctx, document_t *doc) {
@ -158,11 +156,6 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
size_t len = strlen(text); size_t len = strlen(text);
if (len >= MIN_OCR_LEN) { if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1); text_buffer_append_string(&thread_buffer, text, len - 1);
// LOG_DEBUGF(
// "ebook.c",
// "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB",
// pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur
// )
} }
TessBaseAPIEnd(api); TessBaseAPIEnd(api);
@ -230,7 +223,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
err = fzctx->error.errcode; err = fzctx->error.errcode;
if (err) { if (err) {
// LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message) CTX_LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, fzctx->error.message)
fz_drop_stream(fzctx, stream); fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc); fz_drop_document(fzctx, fzdoc);
fz_drop_context(fzctx); fz_drop_context(fzctx);
@ -260,7 +253,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
fz_catch(fzctx) fz_catch(fzctx)
err = fzctx->error.errcode; err = fzctx->error.errcode;
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) CTX_LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, fzctx->error.message)
text_buffer_destroy(&thread_buffer); text_buffer_destroy(&thread_buffer);
fz_drop_page(fzctx, page); fz_drop_page(fzctx, page);
fz_drop_stream(fzctx, stream); fz_drop_stream(fzctx, stream);
@ -293,7 +286,7 @@ void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, docum
err = fzctx->error.errcode; err = fzctx->error.errcode;
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) CTX_LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, fzctx->error.message)
text_buffer_destroy(&thread_buffer); text_buffer_destroy(&thread_buffer);
fz_drop_page(fzctx, page); fz_drop_page(fzctx, page);
fz_drop_stext_page(fzctx, stext); fz_drop_stext_page(fzctx, stext);

View File

@ -9,6 +9,10 @@ typedef struct {
const char *tesseract_lang; const char *tesseract_lang;
const char *tesseract_path; const char *tesseract_path;
pthread_mutex_t mupdf_mutex; pthread_mutex_t mupdf_mutex;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_ebook_ctx_t; } scan_ebook_ctx_t;
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc); void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc);

View File

@ -149,7 +149,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
FT_Face face; FT_Face face;
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face); FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) { if (err != 0) {
// LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err)); CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, FT_Error_String(err))
return; return;
} }
@ -180,7 +180,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
err = FT_Set_Pixel_Sizes(face, 0, pixel); err = FT_Set_Pixel_Sizes(face, 0, pixel);
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err)) CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, FT_Error_String(err))
FT_Done_Face(face); FT_Done_Face(face);
return; return;
} }
@ -200,7 +200,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; c = c >= 'a' && c <= 'z' ? c - 32 : c + 32;
err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER);
if (err != 0) { if (err != 0) {
// LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err)); CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, FT_Error_String(err))
continue; continue;
} }
} }
@ -221,7 +221,7 @@ void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create(); dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap); bmp_format(&bmp_data, dimensions, bitmap);
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur); ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data); dyn_buffer_destroy(&bmp_data);
free(bitmap); free(bitmap);

View File

@ -6,6 +6,9 @@
typedef struct { typedef struct {
int enable_tn; int enable_tn;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_font_cxt_t; } scan_font_cxt_t;
void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc); void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc);

View File

@ -14,7 +14,7 @@
#define AVIO_BUF_SIZE 8192 #define AVIO_BUF_SIZE 8192
__always_inline __always_inline
static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { static AVCodecContext *alloc_jpeg_encoder(scan_media_ctx_t *ctx, int dstW, int dstH, float qscale) {
AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec); AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
@ -28,7 +28,7 @@ static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) {
int ret = avcodec_open2(jpeg, jpeg_codec, NULL); int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
if (ret != 0) { if (ret != 0) {
printf("Could not open jpeg encoder: %s!\n", av_err2str(ret)); CTX_LOG_WARNINGF("media.c", "Could not open jpeg encoder: %s!\n", av_err2str(ret));
return NULL; return NULL;
} }
@ -60,7 +60,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
AVFrame *scaled_frame = av_frame_alloc(); AVFrame *scaled_frame = av_frame_alloc();
struct SwsContext *ctx = sws_getContext( struct SwsContext *sws_ctx = sws_getContext(
decoder->width, decoder->height, decoder->pix_fmt, decoder->width, decoder->height, decoder->pix_fmt,
dstW, dstH, AV_PIX_FMT_YUVJ420P, dstW, dstH, AV_PIX_FMT_YUVJ420P,
SWS_FAST_BILINEAR, 0, 0, 0 SWS_FAST_BILINEAR, 0, 0, 0
@ -71,7 +71,7 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1);
sws_scale(ctx, sws_scale(sws_ctx,
(const uint8_t *const *) frame->data, frame->linesize, (const uint8_t *const *) frame->data, frame->linesize,
0, decoder->height, 0, decoder->height,
scaled_frame->data, scaled_frame->linesize scaled_frame->data, scaled_frame->linesize
@ -81,13 +81,13 @@ AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int si
scaled_frame->height = dstH; scaled_frame->height = dstH;
scaled_frame->format = AV_PIX_FMT_YUV420P; scaled_frame->format = AV_PIX_FMT_YUV420P;
sws_freeContext(ctx); sws_freeContext(sws_ctx);
return scaled_frame; return scaled_frame;
} }
__always_inline __always_inline
static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) { static AVFrame *read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) {
AVFrame *frame = av_frame_alloc(); AVFrame *frame = av_frame_alloc();
AVPacket avPacket; AVPacket avPacket;
@ -101,10 +101,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder,
if (read_frame_ret != 0) { if (read_frame_ret != 0) {
if (read_frame_ret != AVERROR_EOF) { if (read_frame_ret != AVERROR_EOF) {
// LOG_WARNINGF(doc->filepath, CTX_LOG_WARNINGF(doc->filepath,
// "(media.c) avcodec_read_frame() returned error code [%d] %s", "(media.c) avcodec_read_frame() returned error code [%d] %s",
// read_frame_ret, av_err2str(read_frame_ret) read_frame_ret, av_err2str(read_frame_ret)
// ) )
} }
av_frame_free(&frame); av_frame_free(&frame);
av_packet_unref(&avPacket); av_packet_unref(&avPacket);
@ -122,10 +122,10 @@ static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder,
// Feed it to decoder // Feed it to decoder
int decode_ret = avcodec_send_packet(decoder, &avPacket); int decode_ret = avcodec_send_packet(decoder, &avPacket);
if (decode_ret != 0) { if (decode_ret != 0) {
// LOG_ERRORF(doc->filepath, CTX_LOG_ERRORF(doc->filepath,
// "(media.c) avcodec_send_packet() returned error code [%d] %s", "(media.c) avcodec_send_packet() returned error code [%d] %s",
// decode_ret, av_err2str(decode_ret) decode_ret, av_err2str(decode_ret)
// ) )
av_frame_free(&frame); av_frame_free(&frame);
av_packet_unref(&avPacket); av_packet_unref(&avPacket);
return NULL; return NULL;
@ -264,10 +264,6 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
APPEND_META(doc, meta_vid) APPEND_META(doc, meta_vid)
} }
meta_line_t *meta_audio = malloc(sizeof(meta_line_t));
meta_audio->key = MetaMediaAudioCodec;
APPEND_META(doc, meta_audio)
meta_line_t *meta_w = malloc(sizeof(meta_line_t)); meta_line_t *meta_w = malloc(sizeof(meta_line_t));
meta_w->key = MetaWidth; meta_w->key = MetaWidth;
meta_w->int_val = stream->codecpar->width; meta_w->int_val = stream->codecpar->width;
@ -310,7 +306,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
} }
} }
AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc); AVFrame *frame = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame == NULL) { if (frame == NULL) {
avcodec_free_context(&decoder); avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
@ -332,7 +328,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
} }
// Encode frame to jpeg // Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ctx->tn_qscale); AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(ctx, scaled_frame->width, scaled_frame->height, ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame); avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet; AVPacket jpeg_packet;
@ -340,8 +336,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_receive_packet(jpeg_encoder, &jpeg_packet); avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail // Save thumbnail
// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, ctx->store((char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, jpeg_packet.size);
// jpeg_packet.size);
av_packet_unref(&jpeg_packet); av_packet_unref(&jpeg_packet);
av_frame_free(&frame); av_frame_free(&frame);
@ -359,12 +354,12 @@ void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_
AVFormatContext *pFormatCtx = avformat_alloc_context(); AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) { if (pFormatCtx == NULL) {
// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return; return;
} }
int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL);
if (res < 0) { if (res < 0) {
// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
return; return;
@ -389,7 +384,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
AVFormatContext *pFormatCtx = avformat_alloc_context(); AVFormatContext *pFormatCtx = avformat_alloc_context();
if (pFormatCtx == NULL) { if (pFormatCtx == NULL) {
// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") CTX_LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()")
return; return;
} }
@ -408,7 +403,7 @@ void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc)
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
return; return;
} else if (res < 0) { } else if (res < 0) {
// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) CTX_LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res))
av_free(io_ctx->buffer); av_free(io_ctx->buffer);
avio_context_free(&io_ctx); avio_context_free(&io_ctx);
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);

View File

@ -4,13 +4,14 @@
#include "../scan.h" #include "../scan.h"
#define MIN_VIDEO_SIZE 1024 * 64
#define MIN_IMAGE_SIZE 1024 * 2
typedef struct { typedef struct {
long content_size; long content_size;
int tn_size; int tn_size;
float tn_qscale; float tn_qscale;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_media_ctx_t; } scan_media_ctx_t;
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc); void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@ -9,8 +9,6 @@
__always_inline __always_inline
static int should_read_part(const char *part) { static int should_read_part(const char *part) {
// LOG_DEBUGF("ooxml.c", "Got part : %s", part)
if (part == NULL) { if (part == NULL) {
return FALSE; return FALSE;
} }
@ -35,15 +33,15 @@ static int should_read_part(const char *part) {
return FALSE; return FALSE;
} }
int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { int extract_text(scan_ooxml_cxt_t *ctx, xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
//TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't
xmlErrorPtr err = xmlGetLastError(); xmlErrorPtr err = xmlGetLastError();
if (err != NULL) { if (err != NULL) {
if (err->level == XML_ERR_FATAL) { if (err->level == XML_ERR_FATAL) {
// LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) CTX_LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message)
return -1; return -1;
} else { } else {
// LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message) CTX_LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message)
} }
} }
@ -58,7 +56,7 @@ int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) {
} }
} }
extract_text(xml, child->children, buf); extract_text(ctx, xml, child->children, buf);
} }
return 0; return 0;
} }
@ -74,23 +72,23 @@ int xml_io_close(UNUSED(void *context)) {
} }
__always_inline __always_inline
static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) { static int read_part(scan_ooxml_cxt_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET);
if (xml == NULL) { if (xml == NULL) {
// LOG_ERROR(doc->filepath, "Could not parse XML") CTX_LOG_ERROR(doc->filepath, "Could not parse XML")
return -1; return -1;
} }
xmlNode *root = xmlDocGetRootElement(xml); xmlNode *root = xmlDocGetRootElement(xml);
if (root == NULL) { if (root == NULL) {
// LOG_ERROR(doc->filepath, "Empty document") CTX_LOG_ERROR(doc->filepath, "Empty document")
xmlFreeDoc(xml); xmlFreeDoc(xml);
return -1; return -1;
} }
extract_text(xml, root, buf); extract_text(ctx, xml, root, buf);
xmlFreeDoc(xml); xmlFreeDoc(xml);
return 0; return 0;
@ -106,7 +104,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) {
int ret = archive_read_open_memory(a, buf, buf_len); int ret = archive_read_open_memory(a, buf, buf_len);
if (ret != ARCHIVE_OK) { if (ret != ARCHIVE_OK) {
// LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) CTX_LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a))
archive_read_free(a); archive_read_free(a);
return; return;
} }
@ -119,7 +117,7 @@ void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) {
const char *path = archive_entry_pathname(entry); const char *path = archive_entry_pathname(entry);
if (should_read_part(path)) { if (should_read_part(path)) {
ret = read_part(a, &tex, doc); ret = read_part(ctx, a, &tex, doc);
if (ret != 0) { if (ret != 0) {
break; break;
} }

View File

@ -6,6 +6,9 @@
typedef struct { typedef struct {
long content_size; long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_ooxml_cxt_t; } scan_ooxml_cxt_t;
void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc); void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc);

View File

@ -32,6 +32,31 @@ typedef int scan_code_t;
#define SCAN_OK (scan_code_t) 0 #define SCAN_OK (scan_code_t) 0
#define SCAN_ERR_READ (scan_code_t) -1 #define SCAN_ERR_READ (scan_code_t) -1
#define LEVEL_DEBUG 0
#define LEVEL_INFO 1
#define LEVEL_WARNING 2
#define LEVEL_ERROR 3
#define LEVEL_FATAL 4
#define CTX_LOG_DEBUGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_DEBUG, fmt, __VA_ARGS__);
#define CTX_LOG_DEBUG(filepath, str) ctx->log(filepath, LEVEL_DEBUG, str);
#define CTX_LOG_INFOF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_INFO, fmt, __VA_ARGS__);
#define CTX_LOG_INFO(filepath, str) ctx->log(filepath, LEVEL_INFO, str);
#define CTX_LOG_WARNINGF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_WARNING, fmt, __VA_ARGS__);
#define CTX_LOG_WARNING(filepath, str) ctx->log(filepath, LEVEL_WARNING, str);
#define CTX_LOG_ERRORF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_ERROR, fmt, __VA_ARGS__);
#define CTX_LOG_ERROR(filepath, str) ctx->log(filepath, LEVEL_ERROR, str);
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(char *filepath, int level, char *str);
// This is written to file as a 16-bit int! // This is written to file as a 16-bit int!
enum metakey { enum metakey {
MetaContent = META_STR(1), MetaContent = META_STR(1),

View File

@ -6,6 +6,10 @@
typedef struct { typedef struct {
long content_size; long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_text_ctx_t; } scan_text_ctx_t;
scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc); scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc);