From 8a7635359a421c6a5f54e7731e64ac520b6e59c8 Mon Sep 17 00:00:00 2001 From: simon987 Date: Sun, 5 Apr 2020 21:42:18 -0400 Subject: [PATCH] unscramble submodules --- .gitmodules | 3 - third-party/libscan | 1 + third-party/libscan/.gitignore | 10 - third-party/libscan/.gitmodules | 12 - third-party/libscan/CMakeLists.txt | 124 ------- third-party/libscan/README.md | 7 - third-party/libscan/build.sh | 8 - third-party/libscan/libscan/arc/arc.c | 167 --------- third-party/libscan/libscan/arc/arc.h | 26 -- third-party/libscan/libscan/cbr/cbr.c | 65 ---- third-party/libscan/libscan/cbr/cbr.h | 17 - third-party/libscan/libscan/ebook/ebook.c | 334 ----------------- third-party/libscan/libscan/ebook/ebook.h | 16 - third-party/libscan/libscan/font/font.c | 234 ------------ third-party/libscan/libscan/font/font.h | 14 - third-party/libscan/libscan/macros.h | 21 -- third-party/libscan/libscan/media/media.c | 419 ---------------------- third-party/libscan/libscan/media/media.h | 18 - third-party/libscan/libscan/ooxml/ooxml.c | 142 -------- third-party/libscan/libscan/ooxml/ooxml.h | 13 - third-party/libscan/libscan/scan.h | 131 ------- third-party/libscan/libscan/text/text.c | 31 -- third-party/libscan/libscan/text/text.h | 13 - third-party/libscan/libscan/util.c | 0 third-party/libscan/libscan/util.h | 276 -------------- third-party/onion | 1 - third-party/utf8.h | 1 - 27 files changed, 1 insertion(+), 2103 deletions(-) create mode 160000 third-party/libscan delete mode 100644 third-party/libscan/.gitignore delete mode 100644 third-party/libscan/.gitmodules delete mode 100644 third-party/libscan/CMakeLists.txt delete mode 100644 third-party/libscan/README.md delete mode 100755 third-party/libscan/build.sh delete mode 100644 third-party/libscan/libscan/arc/arc.c delete mode 100644 third-party/libscan/libscan/arc/arc.h delete mode 100644 third-party/libscan/libscan/cbr/cbr.c delete mode 100644 third-party/libscan/libscan/cbr/cbr.h delete mode 100644 third-party/libscan/libscan/ebook/ebook.c delete mode 100644 third-party/libscan/libscan/ebook/ebook.h delete mode 100644 third-party/libscan/libscan/font/font.c delete mode 100644 third-party/libscan/libscan/font/font.h delete mode 100644 third-party/libscan/libscan/macros.h delete mode 100644 third-party/libscan/libscan/media/media.c delete mode 100644 third-party/libscan/libscan/media/media.h delete mode 100644 third-party/libscan/libscan/ooxml/ooxml.c delete mode 100644 third-party/libscan/libscan/ooxml/ooxml.h delete mode 100644 third-party/libscan/libscan/scan.h delete mode 100644 third-party/libscan/libscan/text/text.c delete mode 100644 third-party/libscan/libscan/text/text.h delete mode 100644 third-party/libscan/libscan/util.c delete mode 100644 third-party/libscan/libscan/util.h delete mode 160000 third-party/onion delete mode 160000 third-party/utf8.h diff --git a/.gitmodules b/.gitmodules index cc56582..1b29252 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule "third-party/libscan"] path = third-party/libscan url = https://github.com/simon987/libscan -[submodule "third-party/utf8.h"] - path = third-party/utf8.h - url = https://github.com/sheredom/utf8.h [submodule "third-party/argparse"] path = third-party/argparse url = https://github.com/cofyc/argparse diff --git a/third-party/libscan b/third-party/libscan new file mode 160000 index 0000000..5d39dc6 --- /dev/null +++ b/third-party/libscan @@ -0,0 +1 @@ +Subproject commit 5d39dc675849ecb99d5308b02f7e1fd20ca5b410 diff --git a/third-party/libscan/.gitignore b/third-party/libscan/.gitignore deleted file mode 100644 index 6b82d53..0000000 --- a/third-party/libscan/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -.idea/ -cmake_install.cmake -Makefile -libscan.a -libscan.so -*.cbp -CMakeFiles -CMakeCache.txt -scan_test -third-party/ \ No newline at end of file diff --git a/third-party/libscan/.gitmodules b/third-party/libscan/.gitmodules deleted file mode 100644 index 0fcf539..0000000 --- a/third-party/libscan/.gitmodules +++ /dev/null @@ -1,12 +0,0 @@ -[submodule "third-party/uuid"] - path = third-party/uuid - url = https://github.com/certik/uuid -[submodule "third-party/utf8.h"] - path = third-party/utf8.h - url = https://github.com/sheredom/utf8.h -[submodule "third-party/libarchive"] - path = third-party/libarchive - url = https://github.com/libarchive/libarchive -[submodule "third-party/zlib"] - path = third-party/zlib - url = https://github.com/madler/zlib diff --git a/third-party/libscan/CMakeLists.txt b/third-party/libscan/CMakeLists.txt deleted file mode 100644 index 711da51..0000000 --- a/third-party/libscan/CMakeLists.txt +++ /dev/null @@ -1,124 +0,0 @@ -cmake_minimum_required(VERSION 3.15) - -project(scan C) -set(CMAKE_C_STANDARD 11) - -add_library( - scan - libscan/util.c libscan/util.h - libscan/scan.h - libscan/macros.h - - libscan/text/text.c libscan/text/text.h - libscan/arc/arc.c libscan/arc/arc.h - libscan/ebook/ebook.c libscan/ebook/ebook.h - libscan/cbr/cbr.c libscan/cbr/cbr.h - libscan/ooxml/ooxml.c libscan/ooxml/ooxml.h - libscan/media/media.c libscan/media/media.h - libscan/font/font.c libscan/font/font.h - - third-party/utf8.h -) - -set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) -target_link_directories(scan PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/) - -find_package(LibArchive REQUIRED) -find_package(BZip2 REQUIRED) -find_package(lz4 REQUIRED) - -find_package(Threads REQUIRED) -find_package(Tesseract CONFIG REQUIRED) -find_package(harfbuzz CONFIG REQUIRED) -find_package(OpenJPEG CONFIG REQUIRED) -find_package(JPEG REQUIRED) -find_package(LibXml2 REQUIRED) -find_package(FFMPEG REQUIRED) -#find_package(OpenSSL REQUIRED) -find_package(LibLZMA REQUIRED) -find_package(ZLIB REQUIRED) - - -include(ExternalProject) -find_program(MAKE_EXE NAMES gmake nmake make) -ExternalProject_Add( - mupdf - # TODO: use master branch ? - URL https://mupdf.com/downloads/archive/mupdf-1.16.1-source.tar.xz - - UPDATE_COMMAND "" - PATCH_COMMAND "" - TEST_COMMAND "" - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - - PREFIX "third-party/ext_mupdf" - BINARY_DIR "third-party/ext_mupdf/src/mupdf" - - BUILD_COMMAND CFLAGS=-fPIC HAVE_CURL=no HAVE_GLUT=no ${MAKE_EXE} -j 4 --silent - && ar d build/release/libmupdf-third.a jutils.o jdinput.o jdmarker.o jdmaster.o -) -SET(MUPDF_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/build/release/) -SET(MUPDF_INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_mupdf/src/mupdf/include/) - - -target_compile_options( - scan - PRIVATE - -Werror - -g -) - -add_dependencies( - scan - mupdf -) - -SET(CMAKE_C_LINK_EXECUTABLE "g++ -o ") - -string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}") -string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}") - -target_link_libraries( - scan - - ${LibArchive_LIBRARIES} - ZLIB::ZLIB - BZip2::BZip2 - lz4::lz4 - zstd - lzo2 - LibLZMA::LibLZMA - - freetype - - # OpenSSL::SSL OpenSSL::Crypto - - stdc++ - - -Wl,--whole-archive - m - -Wl,--no-whole-archive - - "${MUPDF_LIB_DIR}/libmupdf.a" - "${MUPDF_LIB_DIR}/libmupdf-third.a" - - ${JPEG_LIBRARIES} - ${Tesseract_LIBRARIES} - ${LIBXML2_LIBRARIES} - ${FFMPEG_LIBRARIES} - - ${CMAKE_THREAD_LIBS_INIT} - - uuid -) - -target_include_directories( - scan - BEFORE - PUBLIC - ${MUPDF_INC_DIR} - ${JPEG_INCLUDE_DIR} - ${LIBXML2_INCLUDE_DIR} - ${FFMPEG_INCLUDE_DIR} -) diff --git a/third-party/libscan/README.md b/third-party/libscan/README.md deleted file mode 100644 index aecd594..0000000 --- a/third-party/libscan/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -```bash -vcpkg install libarchive pthread tesseract libxml2 ffmpeg - -cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . -make -j 4 -``` \ No newline at end of file diff --git a/third-party/libscan/build.sh b/third-party/libscan/build.sh deleted file mode 100755 index 2d3464b..0000000 --- a/third-party/libscan/build.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -export CC=gcc -export CXX=g++ - -rm -rf CMakeFiles CMakeCache.txt -cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . || exit -make -j 4 diff --git a/third-party/libscan/libscan/arc/arc.c b/third-party/libscan/libscan/arc/arc.c deleted file mode 100644 index 2e86672..0000000 --- a/third-party/libscan/libscan/arc/arc.c +++ /dev/null @@ -1,167 +0,0 @@ -#include "arc.h" - -#include "../scan.h" -#include "../util.h" - -#include -#include -#include -#include - - - -int should_parse_filtered_file(const char *filepath, int ext) { - char tmp[PATH_MAX * 2]; - - if (ext == 0) { - return FALSE; - } - - memcpy(tmp, filepath, ext - 1); - *(tmp + ext - 1) = '\0'; - - char *idx = strrchr(tmp, '.'); - - if (idx == NULL) { - return FALSE; - } - - if (strcmp(idx, ".tar") == 0) { - return TRUE; - } - - return FALSE; -} - -int arc_read(struct vfile *f, void *buf, size_t size) { - return archive_read_data(f->arc, buf, size); -} - -typedef struct arc_data { - vfile_t *f; - char buf[ARC_BUF_SIZE]; -} arc_data_f; - -int vfile_open_callback(struct archive *a, void *user_data) { - arc_data_f *data = user_data; - - if (data->f->is_fs_file && data->f->fd == -1) { - data->f->fd = open(data->f->filepath, O_RDONLY); - } - - return ARCHIVE_OK; -} - -long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { - arc_data_f *data = user_data; - - *buf = data->buf; - return data->f->read(data->f, data->buf, ARC_BUF_SIZE); -} - -int vfile_close_callback(struct archive *a, void *user_data) { - arc_data_f *data = user_data; - - if (data->f->close != NULL) { - data->f->close(data->f); - } - - return ARCHIVE_OK; -} - -scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { - - struct archive *a; - struct archive_entry *entry; - - - arc_data_f data; - data.f = f; - - int ret = 0; - if (data.f->is_fs_file) { - - a = archive_read_new(); - archive_read_support_filter_all(a); - archive_read_support_format_all(a); - - ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE); - } else if (ctx->mode == ARC_MODE_RECURSE) { - - a = archive_read_new(); - archive_read_support_filter_all(a); - archive_read_support_format_all(a); - - ret = archive_read_open( - a, &data, - vfile_open_callback, - vfile_read_callback, - vfile_close_callback - ); - } else { - return SCAN_OK; - } - - if (ret != ARCHIVE_OK) { - //TODO: log -// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) - archive_read_free(a); - return SCAN_ERR_READ; - } - - if (ctx->mode == ARC_MODE_LIST) { - - dyn_buffer_t buf = dyn_buffer_create(); - - while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { - if (S_ISREG(archive_entry_stat(entry)->st_mode)) { - - char *path = (char *) archive_entry_pathname(entry); - - dyn_buffer_append_string(&buf, path); - dyn_buffer_write_char(&buf, '\n'); - } - } - dyn_buffer_write_char(&buf, '\0'); - - meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur); - meta_list->key = MetaContent; - strcpy(meta_list->str_val, buf.buf); - APPEND_META(doc, meta_list); - dyn_buffer_destroy(&buf); - - } else { - - parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2); - - sub_job->vfile.close = NULL; - sub_job->vfile.read = arc_read; - sub_job->vfile.arc = a; - sub_job->vfile.filepath = sub_job->filepath; - sub_job->vfile.is_fs_file = FALSE; - memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t)); - - while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { - sub_job->info = *archive_entry_stat(entry); - if (S_ISREG(sub_job->info.st_mode)) { - sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry)); - sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; - - char *p = strrchr(sub_job->filepath, '.'); - if (p != NULL) { - sub_job->ext = (int) (p - sub_job->filepath + 1); - } else { - sub_job->ext = (int) strlen(sub_job->filepath); - } - - //TODO: -// parse(sub_job); - } - } - - free(sub_job); - } - - archive_read_free(a); - return SCAN_OK; -} diff --git a/third-party/libscan/libscan/arc/arc.h b/third-party/libscan/libscan/arc/arc.h deleted file mode 100644 index 3c0e95f..0000000 --- a/third-party/libscan/libscan/arc/arc.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef SCAN_ARC_H -#define SCAN_ARC_H - -#include -#include -#include "../scan.h" - -#define ARC_MODE_SKIP 0 -#define ARC_MODE_LIST 1 -#define ARC_MODE_SHALLOW 2 -#define ARC_MODE_RECURSE 3 -typedef int archive_mode_t; - -typedef struct { - archive_mode_t mode; -} scan_arc_ctx_t; - -#define ARC_BUF_SIZE 8192 - -int should_parse_filtered_file(const char *filepath, int ext); - -scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc); - -int arc_read(struct vfile * f, void *buf, size_t size); - -#endif diff --git a/third-party/libscan/libscan/cbr/cbr.c b/third-party/libscan/libscan/cbr/cbr.c deleted file mode 100644 index 970b0a5..0000000 --- a/third-party/libscan/libscan/cbr/cbr.c +++ /dev/null @@ -1,65 +0,0 @@ -#include "cbr.h" -#include "../scan.h" -#include "../util.h" -#include "../arc/arc.h" -#include "../ebook/ebook.h" - -#include -#include - -unsigned int cbr_mime; -unsigned int cbz_mime; - -void cbr_init() { - //TODO: get mime str -// cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr"); -// cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz"); -} - -int is_cbr(unsigned int mime) { - return mime == cbr_mime; -} - -void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc) { - - size_t buf_len; - void *buf = read_all(f, &buf_len); - - char *out_buf = malloc(buf_len * 2); // TODO: we probably only need 1.2x or 1.5x, even better would be a dynamic buffer - size_t out_buf_used = 0; - - struct archive *rar_in = archive_read_new(); - archive_read_support_filter_none(rar_in); - archive_read_support_format_rar(rar_in); - - archive_read_open_memory(rar_in, buf, buf_len); - - struct archive *zip_out = archive_write_new(); - archive_write_set_format_zip(zip_out); - archive_write_open_memory(zip_out, out_buf, buf_len * 2, &out_buf_used); - - struct archive_entry *entry; - while (archive_read_next_header(rar_in, &entry) == ARCHIVE_OK) { - archive_write_header(zip_out, entry); - - char arc_buf[ARC_BUF_SIZE]; - int len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); - while (len > 0) { - archive_write_data(zip_out, arc_buf, len); - len = archive_read_data(rar_in, arc_buf, ARC_BUF_SIZE); - } - } - - archive_write_close(zip_out); - archive_write_free(zip_out); - - archive_read_close(rar_in); - archive_read_free(rar_in); - - doc->mime = cbz_mime; - - //TODO: get mime string -// parse_ebook(out_buf, out_buf_used, doc); - doc->mime = cbr_mime; - free(out_buf); -} diff --git a/third-party/libscan/libscan/cbr/cbr.h b/third-party/libscan/libscan/cbr/cbr.h deleted file mode 100644 index 97d258d..0000000 --- a/third-party/libscan/libscan/cbr/cbr.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef SCAN_CBR_H -#define SCAN_CBR_H - -#include -#include "../scan.h" - -typedef struct { - -} scan_cbr_ctx_t; - -void cbr_init(); - -int is_cbr(unsigned int mime); - -void parse_cbr(scan_cbr_ctx_t *ctx, vfile_t *f, document_t *doc); - -#endif diff --git a/third-party/libscan/libscan/ebook/ebook.c b/third-party/libscan/libscan/ebook/ebook.c deleted file mode 100644 index b4c1d68..0000000 --- a/third-party/libscan/libscan/ebook/ebook.c +++ /dev/null @@ -1,334 +0,0 @@ -#include "ebook.h" -#include "../util.h" -#include -#include -#include - -#define MIN_OCR_SIZE 350 -#define MIN_OCR_LEN 10 - -/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */ -__thread text_buffer_t thread_buffer; -__thread scan_ebook_ctx_t thread_ctx; - - -int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_document *fzdoc) { - - int err = 0; - fz_page *cover = NULL; - - fz_var(cover); - fz_var(err); - fz_try(fzctx) - cover = fz_load_page(fzctx, fzdoc, 0); - fz_catch(fzctx) - err = 1; - - if (err != 0) { - fz_drop_page(fzctx, cover); -// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) - return FALSE; - } - - fz_rect bounds = fz_bound_page(fzctx, cover); - - float scale; - float w = (float) bounds.x1 - bounds.x0; - float h = (float) bounds.y1 - bounds.y0; - if (w > h) { - scale = (float) ctx->tn_size / w; - } else { - scale = (float) ctx->tn_size / h; - } - fz_matrix m = fz_scale(scale, scale); - - bounds = fz_transform_rect(bounds, m); - fz_irect bbox = fz_round_rect(bounds); - fz_pixmap *pixmap = fz_new_pixmap_with_bbox(fzctx, fzctx->colorspace->rgb, bbox, NULL, 0); - - fz_clear_pixmap_with_value(fzctx, pixmap, 0xFF); - fz_device *dev = fz_new_draw_device(fzctx, m, pixmap); - - fz_var(err); - fz_try(fzctx) - { - pthread_mutex_lock(&ctx->mupdf_mutex); - fz_run_page(fzctx, cover, dev, fz_identity, NULL); - } - fz_always(fzctx) - { - fz_close_device(fzctx, dev); - fz_drop_device(fzctx, dev); - pthread_mutex_unlock(&ctx->mupdf_mutex); - } - fz_catch(fzctx) - err = fzctx->error.errcode; - - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) - fz_drop_page(fzctx, cover); - fz_drop_pixmap(fzctx, pixmap); - return FALSE; - } - - fz_buffer *fzbuf = NULL; - fz_var(fzbuf); - fz_var(err); - - fz_try(fzctx) - fzbuf = fz_new_buffer_from_pixmap_as_png(fzctx, pixmap, fz_default_color_params); - fz_catch(fzctx) - err = fzctx->error.errcode; - - if (err == 0) { - unsigned char *tn_buf; - size_t tn_len = fz_buffer_storage(fzctx, fzbuf, &tn_buf); -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) tn_buf, tn_len); - } - - fz_drop_buffer(fzctx, fzbuf); - fz_drop_pixmap(fzctx, pixmap); - fz_drop_page(fzctx, cover); - - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_new_buffer_from_pixmap_as_png() returned error code [%d] %s", err, -// ctx->error.message) - return FALSE; - } - - return TRUE; -} - -void fz_err_callback(void *user, UNUSED(const char *message)) { -// if (LogCtx.verbose) { -// document_t *doc = (document_t *) user; -// LOG_WARNINGF(doc->filepath, "FZ: %s", message) -// } -} - -static void init_fzctx(fz_context *fzctx, document_t *doc) { - fz_disable_icc(fzctx); - fz_register_document_handlers(fzctx); - - fzctx->warn.print_user = doc; - fzctx->warn.print = fz_err_callback; - fzctx->error.print_user = doc; - fzctx->error.print = fz_err_callback; -} - -static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) { - if (block->type != FZ_STEXT_BLOCK_TEXT) { - return 0; - } - - fz_stext_line *line = block->u.t.first_line; - while (line != NULL) { - fz_stext_char *c = line->first_char; - while (c != NULL) { - if (text_buffer_append_char(tex, c->c) == TEXT_BUF_FULL) { - return TEXT_BUF_FULL; - } - c = c->next; - } - line = line->next; - } - return 0; -} - -#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32) - -void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), - fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha), - UNUSED(fz_color_params color_params)) { - - int l2factor = 0; - - if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) { - - fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor); - - if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) { - TessBaseAPI *api = TessBaseAPICreate(); - TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang); - - TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride); - TessBaseAPISetSourceResolution(api, pix->xres); - - char *text = TessBaseAPIGetUTF8Text(api); - size_t len = strlen(text); - if (len >= MIN_OCR_LEN) { - text_buffer_append_string(&thread_buffer, text, len - 1); -// LOG_DEBUGF( -// "ebook.c", -// "(OCR) %dx%d got %dB from tesseract (%s), buffer:%dB", -// pix->w, pix->h, len, ScanCtx.tesseract_lang, thread_buffer.dyn_buffer.cur -// ) - } - - TessBaseAPIEnd(api); - TessBaseAPIDelete(api); - } - fz_drop_pixmap(fzctx, pix); - } -} - -void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc) { - - size_t buf_len; - void * buf = read_all(f, &buf_len); - - static int mu_is_initialized = 0; - if (!mu_is_initialized) { - pthread_mutex_init(&ctx->mupdf_mutex, NULL); - mu_is_initialized = 1; - } - fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - - init_fzctx(fzctx, doc); - - int err = 0; - - fz_document *fzdoc = NULL; - fz_stream *stream = NULL; - fz_var(fzdoc); - fz_var(stream); - fz_var(err); - - fz_try(fzctx) - { - stream = fz_open_memory(fzctx, buf, buf_len); - fzdoc = fz_open_document_with_stream(fzctx, mime_str, stream); - } - fz_catch(fzctx) - err = fzctx->error.errcode; - - if (err != 0) { - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); - return; - } - - char title[4096] = {'\0',}; - fz_try(fzctx) - fz_lookup_metadata(fzctx, fzdoc, FZ_META_INFO_TITLE, title, sizeof(title)); - fz_catch(fzctx) - ; - - if (strlen(title) > 0) { - meta_line_t *meta_content = malloc(sizeof(meta_line_t) + strlen(title)); - meta_content->key = MetaTitle; - strcpy(meta_content->str_val, title); - APPEND_META(doc, meta_content) - } - - int page_count = -1; - fz_var(err); - fz_try(fzctx) - page_count = fz_count_pages(fzctx, fzdoc); - fz_catch(fzctx) - err = fzctx->error.errcode; - - if (err) { -// LOG_WARNINGF(doc->filepath, "fz_count_pages() returned error code [%d] %s", err, ctx->error.message) - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); - return; - } - - if (ctx->tn_size > 0) { - err = render_cover(ctx, fzctx, doc, fzdoc); - } - - if (err == TRUE) { - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); - return; - } - - if (ctx->content_size > 0) { - fz_stext_options opts = {0}; - thread_buffer = text_buffer_create(ctx->content_size); - - for (int current_page = 0; current_page < page_count; current_page++) { - fz_page *page = NULL; - fz_var(err); - fz_try(fzctx) - page = fz_load_page(fzctx, fzdoc, current_page); - fz_catch(fzctx) - err = fzctx->error.errcode; - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_load_page() returned error code [%d] %s", err, ctx->error.message) - text_buffer_destroy(&thread_buffer); - fz_drop_page(fzctx, page); - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); - return; - } - - fz_stext_page *stext = fz_new_stext_page(fzctx, fz_bound_page(fzctx, page)); - fz_device *dev = fz_new_stext_device(fzctx, stext, &opts); - dev->stroke_path = NULL; - dev->stroke_text = NULL; - dev->clip_text = NULL; - dev->clip_stroke_path = NULL; - dev->clip_stroke_text = NULL; - - if (ctx->tesseract_lang!= NULL) { - dev->fill_image = fill_image; - } - - fz_var(err); - fz_try(fzctx) - fz_run_page(fzctx, page, dev, fz_identity, NULL); - fz_always(fzctx) - { - fz_close_device(fzctx, dev); - fz_drop_device(fzctx, dev); - } - fz_catch(fzctx) - err = fzctx->error.errcode; - - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "fz_run_page() returned error code [%d] %s", err, ctx->error.message) - text_buffer_destroy(&thread_buffer); - fz_drop_page(fzctx, page); - fz_drop_stext_page(fzctx, stext); - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); - return; - } - - fz_stext_block *block = stext->first_block; - while (block != NULL) { - int ret = read_stext_block(block, &thread_buffer); - if (ret == TEXT_BUF_FULL) { - break; - } - block = block->next; - } - fz_drop_stext_page(fzctx, stext); - fz_drop_page(fzctx, page); - - if (thread_buffer.dyn_buffer.cur >= thread_buffer.dyn_buffer.size) { - break; - } - } - text_buffer_terminate_string(&thread_buffer); - - meta_line_t *meta_content = malloc(sizeof(meta_line_t) + thread_buffer.dyn_buffer.cur); - meta_content->key = MetaContent; - memcpy(meta_content->str_val, thread_buffer.dyn_buffer.buf, thread_buffer.dyn_buffer.cur); - APPEND_META(doc, meta_content) - - text_buffer_destroy(&thread_buffer); - } - - fz_drop_stream(fzctx, stream); - fz_drop_document(fzctx, fzdoc); - fz_drop_context(fzctx); -} diff --git a/third-party/libscan/libscan/ebook/ebook.h b/third-party/libscan/libscan/ebook/ebook.h deleted file mode 100644 index d0bfbc0..0000000 --- a/third-party/libscan/libscan/ebook/ebook.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef SCAN_EBOOK_H -#define SCAN_EBOOK_H - -#include "../scan.h" - -typedef struct { - long content_size; - int tn_size; - const char *tesseract_lang; - const char *tesseract_path; - pthread_mutex_t mupdf_mutex; -} scan_ebook_ctx_t; - -void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char* mime_str, document_t *doc); - -#endif diff --git a/third-party/libscan/libscan/font/font.c b/third-party/libscan/libscan/font/font.c deleted file mode 100644 index c0947ea..0000000 --- a/third-party/libscan/libscan/font/font.c +++ /dev/null @@ -1,234 +0,0 @@ -#include "font.h" - -#include -#include -#include "../util.h" - - -__thread FT_Library ft_lib = NULL; - - -typedef struct text_dimensions { - unsigned int width; - unsigned int height; - unsigned int baseline; -} text_dimensions_t; - -typedef struct glyph { - int top; - int height; - int width; - int descent; - int ascent; - int advance_width; - unsigned char *pixmap; -} glyph_t; - - -__always_inline -int kerning_offset(char c, char pc, FT_Face face) { - FT_Vector kerning; - FT_Get_Kerning(face, c, pc, FT_KERNING_DEFAULT, &kerning); - - return (int) (kerning.x / 64); -} - -__always_inline -glyph_t ft_glyph_to_glyph(FT_GlyphSlot slot) { - glyph_t glyph; - - glyph.pixmap = slot->bitmap.buffer; - - glyph.width = (int) slot->bitmap.width; - glyph.height = (int) slot->bitmap.rows; - glyph.top = slot->bitmap_top; - glyph.advance_width = (int) slot->advance.x / 64; - - glyph.descent = MAX(0, glyph.height - glyph.top); - glyph.ascent = MAX(0, MAX(glyph.top, glyph.height) - glyph.descent); - - return glyph; -} - -text_dimensions_t text_dimension(char *text, FT_Face face) { - text_dimensions_t dimensions; - - dimensions.width = 0; - - int num_chars = (int) strlen(text); - - unsigned int max_ascent = 0; - int max_descent = 0; - - char pc = 0; - for (int i = 0; i < num_chars; i++) { - char c = text[i]; - - FT_Load_Char(face, c, 0); - glyph_t glyph = ft_glyph_to_glyph(face->glyph); - - max_descent = MAX(max_descent, glyph.descent); - max_ascent = MAX(max_ascent, MAX(glyph.height, glyph.ascent)); - - int kerning_x = kerning_offset(c, pc, face); - dimensions.width += MAX(glyph.advance_width, glyph.width) + kerning_x; - - pc = c; - } - - dimensions.height = max_ascent + max_descent; - dimensions.baseline = max_descent; - - return dimensions; -} - -void draw_glyph(glyph_t *glyph, int x, int y, struct text_dimensions text_info, unsigned char *bitmap) { - unsigned int src = 0; - unsigned int dst = y * text_info.width + x; - unsigned int row_offset = text_info.width - glyph->width; - unsigned int buf_len = text_info.width * text_info.height; - - for (unsigned int sy = 0; sy < glyph->height; sy++) { - for (unsigned int sx = 0; sx < glyph->width; sx++) { - if (dst < buf_len) { - bitmap[dst] |= glyph->pixmap[src]; - } - src++; - dst++; - } - dst += row_offset; - } -} - -void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned char *bitmap) { - - dyn_buffer_write_short(buf, 0x4D42); // Magic - dyn_buffer_write_int(buf, 0); // Size placeholder - dyn_buffer_write_int(buf, 0x5157); //Reserved - dyn_buffer_write_int(buf, 14 + 40 + 256 * 4); // pixels offset - - dyn_buffer_write_int(buf, 40); // DIB size - dyn_buffer_write_int(buf, (int) dimensions.width); - dyn_buffer_write_int(buf, (int) dimensions.height); - dyn_buffer_write_short(buf, 1); // Color planes - dyn_buffer_write_short(buf, 8); // bits per pixel - dyn_buffer_write_int(buf, 0); // compression - dyn_buffer_write_int(buf, 0); // Ignored - dyn_buffer_write_int(buf, 3800); // hres - dyn_buffer_write_int(buf, 3800); // vres - dyn_buffer_write_int(buf, 256); // Color count - dyn_buffer_write_int(buf, 0); // Ignored - - // RGBA32 Color table (Grayscale) - for (int i = 255; i >= 0; i--) { - dyn_buffer_write_int(buf, i + (i << 8) + (i << 16)); - } - - // Pixel array: write from bottom to top, with rows padded to multiples of 4-bytes - for (int y = (int) dimensions.height - 1; y >= 0; y--) { - for (unsigned int x = 0; x < dimensions.width; x++) { - dyn_buffer_write_char(buf, (char) bitmap[y * dimensions.width + x]); - } - while (buf->cur % 4 != 0) { - dyn_buffer_write_char(buf, 0); - } - } - - // Size - *(int *) ((char *) buf->buf + 2) = buf->cur; -} - -void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc) { - if (ft_lib == NULL) { - FT_Init_FreeType(&ft_lib); - } - - size_t buf_len; - void * buf = read_all(f, &buf_len); - - FT_Face face; - FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face); - if (err != 0) { -// LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, ft_error_string(err)); - return; - } - - char font_name[1024]; - - if (face->style_name == NULL || *(face->style_name) == '?') { - if (face->family_name == NULL) { - strcpy(font_name, "(null)"); - } else { - strcpy(font_name, face->family_name); - } - } else { - snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name); - } - - meta_line_t *meta_name = malloc(sizeof(meta_line_t) + strlen(font_name)); - meta_name->key = MetaFontName; - strcpy(meta_name->str_val, font_name); - APPEND_META(doc, meta_name) - - if (ctx->enable_tn == TRUE) { - FT_Done_Face(face); - return; - } - - int pixel = 64; - int num_chars = (int) strlen(font_name); - - err = FT_Set_Pixel_Sizes(face, 0, pixel); - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, ft_error_string(err)) - FT_Done_Face(face); - return; - } - - text_dimensions_t dimensions = text_dimension(font_name, face); - unsigned char *bitmap = calloc(dimensions.width * dimensions.height, 1); - - FT_Vector pen; - pen.x = 0; - - char pc = 0; - for (int i = 0; i < num_chars; i++) { - char c = font_name[i]; - - err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); - if (err != 0) { - c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; - err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); - if (err != 0) { -// LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, ft_error_string(err)); - continue; - } - } - glyph_t glyph = ft_glyph_to_glyph(face->glyph); - - pen.x += kerning_offset(c, pc, face); - if (pen.x <= 0) { - pen.x = ABS(glyph.advance_width - glyph.width); - } - pen.y = dimensions.height - glyph.ascent - dimensions.baseline; - - draw_glyph(&glyph, pen.x, pen.y, dimensions, bitmap); - - pen.x += glyph.advance_width; - pc = c; - } - - dyn_buffer_t bmp_data = dyn_buffer_create(); - bmp_format(&bmp_data, dimensions, bitmap); - -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) bmp_data.buf, bmp_data.cur); - - dyn_buffer_destroy(&bmp_data); - free(bitmap); - - FT_Done_Face(face); -} - -void cleanup_font() { - FT_Done_FreeType(ft_lib); -} diff --git a/third-party/libscan/libscan/font/font.h b/third-party/libscan/libscan/font/font.h deleted file mode 100644 index 01e6b03..0000000 --- a/third-party/libscan/libscan/font/font.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef SCAN_FONT_H -#define SCAN_FONT_H - -#include "../scan.h" - - -typedef struct { - int enable_tn; -} scan_font_cxt_t; - -void parse_font(scan_font_cxt_t *ctx, vfile_t *f, document_t *doc); -void cleanup_font(); - -#endif diff --git a/third-party/libscan/libscan/macros.h b/third-party/libscan/libscan/macros.h deleted file mode 100644 index 0dfd5b0..0000000 --- a/third-party/libscan/libscan/macros.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef FALSE -#define FALSE (0) -#define BOOL int -#endif - -#ifndef TRUE -#define TRUE (!FALSE) -#endif - -#undef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) - -#undef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) - -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#undef ABS -#define ABS(a) (((a) < 0) ? -(a) : (a)) diff --git a/third-party/libscan/libscan/media/media.c b/third-party/libscan/libscan/media/media.c deleted file mode 100644 index 656773b..0000000 --- a/third-party/libscan/libscan/media/media.c +++ /dev/null @@ -1,419 +0,0 @@ -#include "media.h" - -#include "../util.h" - -#include "libavformat/avformat.h" -#include "libswscale/swscale.h" -#include "libswresample/swresample.h" -#include "libavcodec/avcodec.h" -#include "libavutil/imgutils.h" - -#include - -#define MIN_SIZE 32 -#define AVIO_BUF_SIZE 8192 - -__always_inline -static AVCodecContext *alloc_jpeg_encoder(int dstW, int dstH, float qscale) { - - AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); - AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec); - jpeg->width = dstW; - jpeg->height = dstH; - jpeg->time_base.den = 1000000; - jpeg->time_base.num = 1; - jpeg->i_quant_factor = qscale; - - jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P; - int ret = avcodec_open2(jpeg, jpeg_codec, NULL); - - if (ret != 0) { - printf("Could not open jpeg encoder: %s!\n", av_err2str(ret)); - return NULL; - } - - return jpeg; -} - -__always_inline -AVFrame *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) { - - int dstW; - int dstH; - if (frame->width <= size && frame->height <= size) { - dstW = frame->width; - dstH = frame->height; - } else { - double ratio = (double) frame->width / frame->height; - if (frame->width > frame->height) { - dstW = size; - dstH = (int) (size / ratio); - } else { - dstW = (int) (size * ratio); - dstH = size; - } - } - - if (dstW <= MIN_SIZE || dstH <= MIN_SIZE) { - return NULL; - } - - AVFrame *scaled_frame = av_frame_alloc(); - - struct SwsContext *ctx = sws_getContext( - decoder->width, decoder->height, decoder->pix_fmt, - dstW, dstH, AV_PIX_FMT_YUVJ420P, - SWS_FAST_BILINEAR, 0, 0, 0 - ); - - int dst_buf_len = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, dstW, dstH, 1); - uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len); - - av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, dstW, dstH, 1); - - sws_scale(ctx, - (const uint8_t *const *) frame->data, frame->linesize, - 0, decoder->height, - scaled_frame->data, scaled_frame->linesize - ); - - scaled_frame->width = dstW; - scaled_frame->height = dstH; - scaled_frame->format = AV_PIX_FMT_YUV420P; - - sws_freeContext(ctx); - - return scaled_frame; -} - -__always_inline -static AVFrame *read_frame(AVFormatContext *pFormatCtx, AVCodecContext *decoder, int stream_idx, document_t *doc) { - AVFrame *frame = av_frame_alloc(); - - AVPacket avPacket; - av_init_packet(&avPacket); - - int receive_ret = -EAGAIN; - while (receive_ret == -EAGAIN) { - // Get video frame - while (1) { - int read_frame_ret = av_read_frame(pFormatCtx, &avPacket); - - if (read_frame_ret != 0) { - if (read_frame_ret != AVERROR_EOF) { -// LOG_WARNINGF(doc->filepath, -// "(media.c) avcodec_read_frame() returned error code [%d] %s", -// read_frame_ret, av_err2str(read_frame_ret) -// ) - } - av_frame_free(&frame); - av_packet_unref(&avPacket); - return NULL; - } - - //Ignore audio/other frames - if (avPacket.stream_index != stream_idx) { - av_packet_unref(&avPacket); - continue; - } - break; - } - - // Feed it to decoder - int decode_ret = avcodec_send_packet(decoder, &avPacket); - if (decode_ret != 0) { -// LOG_ERRORF(doc->filepath, -// "(media.c) avcodec_send_packet() returned error code [%d] %s", -// decode_ret, av_err2str(decode_ret) -// ) - av_frame_free(&frame); - av_packet_unref(&avPacket); - return NULL; - } - av_packet_unref(&avPacket); - receive_ret = avcodec_receive_frame(decoder, frame); - } - return frame; -} - -#define APPEND_TAG_META(doc, tag_, keyname) \ - text_buffer_t tex = text_buffer_create(-1); \ - text_buffer_append_string0(&tex, tag_->value); \ - text_buffer_terminate_string(&tex); \ - meta_line_t *meta_tag = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); \ - meta_tag->key = keyname; \ - strcpy(meta_tag->str_val, tex.dyn_buffer.buf); \ - APPEND_META(doc, meta_tag) \ - text_buffer_destroy(&tex); - -__always_inline -static void append_audio_meta(AVFormatContext *pFormatCtx, document_t *doc) { - - AVDictionaryEntry *tag = NULL; - while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { - char key[256]; - strncpy(key, tag->key, sizeof(key)); - - char *ptr = key; - for (; *ptr; ++ptr) *ptr = (char) tolower(*ptr); - - if (strcmp(key, "artist") == 0) { - APPEND_TAG_META(doc, tag, MetaArtist) - } else if (strcmp(key, "genre") == 0) { - APPEND_TAG_META(doc, tag, MetaGenre) - } else if (strcmp(key, "title") == 0) { - APPEND_TAG_META(doc, tag, MetaTitle) - } else if (strcmp(key, "album_artist") == 0) { - APPEND_TAG_META(doc, tag, MetaAlbumArtist) - } else if (strcmp(key, "album") == 0) { - APPEND_TAG_META(doc, tag, MetaAlbum) - } - } -} - -__always_inline -static void -append_video_meta(AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int include_audio_tags, int is_video) { - - if (is_video) { - meta_line_t *meta_duration = malloc(sizeof(meta_line_t)); - meta_duration->key = MetaMediaDuration; - meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE; - APPEND_META(doc, meta_duration) - - meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t)); - meta_bitrate->key = MetaMediaBitrate; - meta_bitrate->long_val = pFormatCtx->bit_rate; - APPEND_META(doc, meta_bitrate) - } - - AVDictionaryEntry *tag = NULL; - if (is_video) { - while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { - if (include_audio_tags && strcmp(tag->key, "title") == 0) { - APPEND_TAG_META(doc, tag, MetaTitle) - } else if (strcmp(tag->key, "comment") == 0) { - APPEND_TAG_META(doc, tag, MetaContent) - } else if (include_audio_tags && strcmp(tag->key, "artist") == 0) { - APPEND_TAG_META(doc, tag, MetaArtist) - } - } - } else { - // EXIF metadata - while ((tag = av_dict_get(frame->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { - if (include_audio_tags && strcmp(tag->key, "Artist") == 0) { - APPEND_TAG_META(doc, tag, MetaArtist) - } else if (strcmp(tag->key, "ImageDescription") == 0) { - APPEND_TAG_META(doc, tag, MetaContent) - } else if (strcmp(tag->key, "Make") == 0) { - APPEND_TAG_META(doc, tag, MetaExifMake) - } else if (strcmp(tag->key, "Model") == 0) { - APPEND_TAG_META(doc, tag, MetaExifModel) - } else if (strcmp(tag->key, "Software") == 0) { - APPEND_TAG_META(doc, tag, MetaExifSoftware) - } else if (strcmp(tag->key, "FNumber") == 0) { - APPEND_TAG_META(doc, tag, MetaExifFNumber) - } else if (strcmp(tag->key, "FocalLength") == 0) { - APPEND_TAG_META(doc, tag, MetaExifFocalLength) - } else if (strcmp(tag->key, "UserComment") == 0) { - APPEND_TAG_META(doc, tag, MetaExifUserComment) - } else if (strcmp(tag->key, "ISOSpeedRatings") == 0) { - APPEND_TAG_META(doc, tag, MetaExifIsoSpeedRatings) - } else if (strcmp(tag->key, "ExposureTime") == 0) { - APPEND_TAG_META(doc, tag, MetaExifExposureTime) - } else if (strcmp(tag->key, "DateTime") == 0) { - APPEND_TAG_META(doc, tag, MetaExifDateTime) - } - } - } -} - -void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) { - - int video_stream = -1; - int audio_stream = -1; - - avformat_find_stream_info(pFormatCtx, NULL); - - for (int i = (int) pFormatCtx->nb_streams - 1; i >= 0; i--) { - AVStream *stream = pFormatCtx->streams[i]; - - if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - if (audio_stream == -1) { - meta_line_t *meta_audio = malloc(sizeof(meta_line_t)); - meta_audio->key = MetaMediaAudioCodec; - meta_audio->int_val = stream->codecpar->codec_id; - APPEND_META(doc, meta_audio) - - append_audio_meta(pFormatCtx, doc); - audio_stream = i; - } - } else if (stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { - - if (video_stream == -1) { - meta_line_t *meta_vid = malloc(sizeof(meta_line_t)); - meta_vid->key = MetaMediaVideoCodec; - meta_vid->int_val = stream->codecpar->codec_id; - APPEND_META(doc, meta_vid) - - meta_line_t *meta_w = malloc(sizeof(meta_line_t)); - meta_w->key = MetaWidth; - meta_w->int_val = stream->codecpar->width; - APPEND_META(doc, meta_w) - - meta_line_t *meta_h = malloc(sizeof(meta_line_t)); - meta_h->key = MetaHeight; - meta_h->int_val = stream->codecpar->height; - APPEND_META(doc, meta_h) - - video_stream = i; - } - } - } - - if (video_stream != -1 && ctx->tn_size > 0) { - AVStream *stream = pFormatCtx->streams[video_stream]; - - if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } - - // Decoder - AVCodec *video_codec = avcodec_find_decoder(stream->codecpar->codec_id); - AVCodecContext *decoder = avcodec_alloc_context3(video_codec); - avcodec_parameters_to_context(decoder, stream->codecpar); - avcodec_open2(decoder, video_codec, NULL); - - //Seek - if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) { - int seek_ret = 0; - for (int i = 20; i >= 0; i--) { - seek_ret = av_seek_frame(pFormatCtx, video_stream, - stream->duration * 0.10, 0); - if (seek_ret == 0) { - break; - } - } - } - - AVFrame *frame = read_frame(pFormatCtx, decoder, video_stream, doc); - if (frame == NULL) { - avcodec_free_context(&decoder); - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } - - append_video_meta(pFormatCtx, frame, doc, audio_stream == -1, stream->nb_frames > 1); - - // Scale frame - AVFrame *scaled_frame = scale_frame(decoder, frame, ctx->tn_size); - - if (scaled_frame == NULL) { - av_frame_free(&frame); - avcodec_free_context(&decoder); - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } - - // Encode frame to jpeg - AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, ctx->tn_qscale); - avcodec_send_frame(jpeg_encoder, scaled_frame); - - AVPacket jpeg_packet; - av_init_packet(&jpeg_packet); - avcodec_receive_packet(jpeg_encoder, &jpeg_packet); - - // Save thumbnail -// store_write(ScanCtx.index.store, (char *) doc->uuid, sizeof(doc->uuid), (char *) jpeg_packet.data, -// jpeg_packet.size); - - av_packet_unref(&jpeg_packet); - av_frame_free(&frame); - av_free(*scaled_frame->data); - av_frame_free(&scaled_frame); - avcodec_free_context(&jpeg_encoder); - avcodec_free_context(&decoder); - } - - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); -} - -void parse_media_filename(scan_media_ctx_t *ctx, const char *filepath, document_t *doc) { - - AVFormatContext *pFormatCtx = avformat_alloc_context(); - if (pFormatCtx == NULL) { -// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") - return; - } - int res = avformat_open_input(&pFormatCtx, filepath, NULL, NULL); - if (res < 0) { -// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } - - parse_media_format_ctx(ctx, pFormatCtx, doc); -} - - -int vfile_read(void *ptr, uint8_t *buf, int buf_size) { - struct vfile *f = ptr; - - int ret = f->read(f, buf, buf_size); - - if (ret == 0) { - return AVERROR_EOF; - } - return ret; -} - -void parse_media_vfile(scan_media_ctx_t *ctx, struct vfile *f, document_t *doc) { - - AVFormatContext *pFormatCtx = avformat_alloc_context(); - if (pFormatCtx == NULL) { -// LOG_ERROR(doc->filepath, "(media.c) Could not allocate context with avformat_alloc_context()") - return; - } - - unsigned char *buffer = (unsigned char *) av_malloc(AVIO_BUF_SIZE); - AVIOContext *io_ctx = avio_alloc_context(buffer, AVIO_BUF_SIZE, 0, f, vfile_read, NULL, NULL); - - pFormatCtx->pb = io_ctx; - pFormatCtx->flags |= AVFMT_FLAG_CUSTOM_IO; - - int res = avformat_open_input(&pFormatCtx, "", NULL, NULL); - if (res == -5) { - // Tried to parse media that requires seek - av_free(io_ctx->buffer); - avio_context_free(&io_ctx); - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } else if (res < 0) { -// LOG_ERRORF(doc->filepath, "(media.c) avformat_open_input() returned [%d] %s", res, av_err2str(res)) - av_free(io_ctx->buffer); - avio_context_free(&io_ctx); - avformat_close_input(&pFormatCtx); - avformat_free_context(pFormatCtx); - return; - } - - parse_media_format_ctx(ctx, pFormatCtx, doc); - av_free(io_ctx->buffer); - avio_context_free(&io_ctx); -} - -void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc) { - - if (f->is_fs_file) { - parse_media_filename(ctx, f->filepath, doc); - } else { - parse_media_vfile(ctx, f, doc); - } -} diff --git a/third-party/libscan/libscan/media/media.h b/third-party/libscan/libscan/media/media.h deleted file mode 100644 index 7874316..0000000 --- a/third-party/libscan/libscan/media/media.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef SIST2_MEDIA_H -#define SIST2_MEDIA_H - - -#include "../scan.h" - -#define MIN_VIDEO_SIZE 1024 * 64 -#define MIN_IMAGE_SIZE 1024 * 2 - -typedef struct { - long content_size; - int tn_size; - float tn_qscale; -} scan_media_ctx_t; - -void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc); - -#endif diff --git a/third-party/libscan/libscan/ooxml/ooxml.c b/third-party/libscan/libscan/ooxml/ooxml.c deleted file mode 100644 index 11beb19..0000000 --- a/third-party/libscan/libscan/ooxml/ooxml.c +++ /dev/null @@ -1,142 +0,0 @@ -#include "ooxml.h" - -#include "../util.h" -#include -#include -#include -#include - -__always_inline -static int should_read_part(const char *part) { - -// LOG_DEBUGF("ooxml.c", "Got part : %s", part) - - if (part == NULL) { - return FALSE; - } - - if ( // Word - STR_STARTS_WITH(part, "word/document.xml") - || STR_STARTS_WITH(part, "word/footnotes.xml") - || STR_STARTS_WITH(part, "word/endnotes.xml") - || STR_STARTS_WITH(part, "word/footer") - || STR_STARTS_WITH(part, "word/header") - // PowerPoint - || STR_STARTS_WITH(part, "ppt/slides/slide") - || STR_STARTS_WITH(part, "ppt/notesSlides/slide") - // Excel - || STR_STARTS_WITH(part, "xl/worksheets/sheet") - || STR_STARTS_WITH(part, "xl/sharedStrings.xml") - || STR_STARTS_WITH(part, "xl/workbook.xml") - ) { - return TRUE; - } - - return FALSE; -} - -int extract_text(xmlDoc *xml, xmlNode *node, text_buffer_t *buf) { - //TODO: Check which nodes are likely to have a 't' child, and ignore nodes that aren't - xmlErrorPtr err = xmlGetLastError(); - if (err != NULL) { - if (err->level == XML_ERR_FATAL) { -// LOG_ERRORF("ooxml.c", "Got fatal XML error while parsing document: %s", err->message) - return -1; - } else { -// LOG_ERRORF("ooxml.c", "Got recoverable XML error while parsing document: %s", err->message) - } - } - - for (xmlNode *child = node; child; child = child->next) { - if (*child->name == 't' && *(child->name + 1) == '\0') { - xmlChar *text = xmlNodeListGetString(xml, child->xmlChildrenNode, 1); - - if (text) { - text_buffer_append_string0(buf, (char *) text); - text_buffer_append_char(buf, ' '); - xmlFree(text); - } - } - - extract_text(xml, child->children, buf); - } - return 0; -} - -int xml_io_read(void *context, char *buffer, int len) { - struct archive *a = context; - return archive_read_data(a, buffer, len); -} - -int xml_io_close(UNUSED(void *context)) { - //noop - return 0; -} - -__always_inline -static int read_part(struct archive *a, text_buffer_t *buf, document_t *doc) { - - xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL, XML_PARSE_RECOVER | XML_PARSE_NOWARNING | XML_PARSE_NOERROR | XML_PARSE_NONET); - - if (xml == NULL) { -// LOG_ERROR(doc->filepath, "Could not parse XML") - return -1; - } - - xmlNode *root = xmlDocGetRootElement(xml); - if (root == NULL) { -// LOG_ERROR(doc->filepath, "Empty document") - xmlFreeDoc(xml); - return -1; - } - - extract_text(xml, root, buf); - xmlFreeDoc(xml); - - return 0; -} - -void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc) { - - size_t buf_len; - void * buf = read_all(f, &buf_len); - - struct archive *a = archive_read_new(); - archive_read_support_format_zip(a); - - int ret = archive_read_open_memory(a, buf, buf_len); - if (ret != ARCHIVE_OK) { -// LOG_ERRORF(doc->filepath, "Could not read archive: %s", archive_error_string(a)) - archive_read_free(a); - return; - } - - text_buffer_t tex = text_buffer_create(ctx->content_size); - - struct archive_entry *entry; - while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { - if (S_ISREG(archive_entry_stat(entry)->st_mode)) { - const char *path = archive_entry_pathname(entry); - - if (should_read_part(path)) { - ret = read_part(a, &tex, doc); - if (ret != 0) { - break; - } - } - } - } - - if (tex.dyn_buffer.cur > 0) { - text_buffer_terminate_string(&tex); - - meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); - meta->key = MetaContent; - strcpy(meta->str_val, tex.dyn_buffer.buf); - APPEND_META(doc, meta) - } - - archive_read_close(a); - archive_read_free(a); - text_buffer_destroy(&tex); -} diff --git a/third-party/libscan/libscan/ooxml/ooxml.h b/third-party/libscan/libscan/ooxml/ooxml.h deleted file mode 100644 index 369288c..0000000 --- a/third-party/libscan/libscan/ooxml/ooxml.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef SCAN_OOXML_H -#define SCAN_OOXML_H - -#include -#include "../scan.h" - -typedef struct { - long content_size; -} scan_ooxml_cxt_t; - -void parse_doc(scan_ooxml_cxt_t *ctx, vfile_t *f, document_t *doc); - -#endif diff --git a/third-party/libscan/libscan/scan.h b/third-party/libscan/libscan/scan.h deleted file mode 100644 index 03cc2fe..0000000 --- a/third-party/libscan/libscan/scan.h +++ /dev/null @@ -1,131 +0,0 @@ -#ifndef SCAN_SCAN_H -#define SCAN_SCAN_H - -#include -#include -#include - -#include "macros.h" - - -#define META_INT_MASK 0x80 -#define META_STR_MASK 0x40 -#define META_LONG_MASK 0x20 - -#define UNUSED(x) __attribute__((__unused__)) x - -#define META_STR(id) ((unsigned) id) | ((unsigned) META_STR_MASK) -#define META_INT(id) ((unsigned) id) | ((unsigned) META_INT_MASK) -#define META_LONG(id) ((unsigned) id) | ((unsigned) META_LONG_MASK) - -#define IS_META_INT(key) (key & META_INT_MASK) == META_INT_MASK -#define IS_META_LONG(key) (key & META_LONG_MASK) == META_LONG_MASK -#define IS_META_STR(meta) (meta->key & META_STR_MASK) == META_STR_MASK - - -typedef int scan_code_t; -#define SCAN_OK (scan_code_t) 0 -#define SCAN_ERR_READ (scan_code_t) -1 - -// This is written to file as a 16-bit int! -enum metakey { - MetaContent = META_STR(1), - MetaWidth = META_INT(2), - MetaHeight = META_INT(3), - MetaMediaDuration = META_LONG(4), - MetaMediaAudioCodec = META_INT(5), - MetaMediaVideoCodec = META_INT(6), - MetaMediaBitrate = META_LONG(7), - MetaArtist = META_STR(8), - MetaAlbum = META_STR(9), - MetaAlbumArtist = META_STR(10), - MetaGenre = META_STR(11), - MetaTitle = META_STR(12), - MetaFontName = META_STR(13), - MetaParent = META_STR(14), - MetaExifMake = META_STR(15), - MetaExifSoftware = META_STR(16), - MetaExifExposureTime = META_STR(17), - MetaExifFNumber = META_STR(18), - MetaExifFocalLength = META_STR(19), - MetaExifUserComment = META_STR(20), - MetaExifModel = META_STR(21), - MetaExifIsoSpeedRatings = META_STR(22), - MetaExifDateTime = META_STR(23), -}; - -typedef struct meta_line { - struct meta_line *next; - enum metakey key; - union { - char str_val[0]; - int int_val; - unsigned long long_val; - }; -} meta_line_t; - - -typedef struct document { - unsigned char uuid[16]; - unsigned long ino; - unsigned long size; - unsigned int mime; - int mtime; - short base; - short ext; - meta_line_t *meta_head; - meta_line_t *meta_tail; - char *filepath; -} document_t; - -typedef struct vfile vfile_t; - -__attribute__((warn_unused_result)) -typedef int (*read_func_t)(struct vfile *, void *buf, size_t size); - -typedef void (*close_func_t)(struct vfile *); - -typedef struct vfile { - union { - int fd; - struct archive *arc; - }; - - int is_fs_file; - char *filepath; - struct stat info; - - read_func_t read; - close_func_t close; -} vfile_t; - -typedef struct parse_job_t { - int base; - int ext; - struct stat info; - struct vfile vfile; - uuid_t parent; - char filepath[1]; -} parse_job_t; - - -#define APPEND_META(doc, meta) \ - meta->next = NULL;\ - if (doc->meta_head == NULL) {\ - doc->meta_head = meta;\ - doc->meta_tail = doc->meta_head;\ - } else {\ - doc->meta_tail->next = meta;\ - doc->meta_tail = meta;\ - } - - -#endif - -#include "arc/arc.h" -#include "cbr/cbr.h" -#include "ebook/ebook.h" -#include "font/font.h" -#include "media/media.h" -#include "ooxml/ooxml.h" -#include "text/text.h" diff --git a/third-party/libscan/libscan/text/text.c b/third-party/libscan/libscan/text/text.c deleted file mode 100644 index aec00cd..0000000 --- a/third-party/libscan/libscan/text/text.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "text.h" - -scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc) { - - int to_read = MIN(ctx->content_size, doc->size); - - char *buf = malloc(to_read); - int ret = f->read(f, buf, to_read); - if (ret < 0) { - //TODO: log - return SCAN_ERR_READ; - } - - text_buffer_t tex = text_buffer_create(ctx->content_size); - text_buffer_append_string(&tex, buf, to_read); - text_buffer_terminate_string(&tex); - - meta_line_t *meta = malloc(sizeof(meta_line_t) + tex.dyn_buffer.cur); - meta->key = MetaContent; - strcpy(meta->str_val, tex.dyn_buffer.buf); - - APPEND_META(doc, meta) - - printf("%s", meta->str_val); - - free(buf); - text_buffer_destroy(&tex); - - return SCAN_OK; -} - diff --git a/third-party/libscan/libscan/text/text.h b/third-party/libscan/libscan/text/text.h deleted file mode 100644 index aa1ea29..0000000 --- a/third-party/libscan/libscan/text/text.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef SCAN_TEXT_H -#define SCAN_TEXT_H - -#include "../scan.h" -#include "../util.h" - -typedef struct { - long content_size; -} scan_text_ctx_t; - -scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc); - -#endif diff --git a/third-party/libscan/libscan/util.c b/third-party/libscan/libscan/util.c deleted file mode 100644 index e69de29..0000000 diff --git a/third-party/libscan/libscan/util.h b/third-party/libscan/libscan/util.h deleted file mode 100644 index b584b6e..0000000 --- a/third-party/libscan/libscan/util.h +++ /dev/null @@ -1,276 +0,0 @@ -#ifndef SCAN_UTIL_H -#define SCAN_UTIL_H - -#include "stdio.h" -#include "stdlib.h" -#include "string.h" -#include "../third-party/utf8.h/utf8.h" -#include "macros.h" - -#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0) - -#define TEXT_BUF_FULL -1 -#define INITIAL_BUF_SIZE 1024 * 16 - -#define SHOULD_IGNORE_CHAR(c) !(SHOULD_KEEP_CHAR(c)) -#define SHOULD_KEEP_CHAR(c) ((c >= '\'' && c <= ';') || (c >= 'A' && c <= 'z') || (c > 127)) - - -typedef struct dyn_buffer { - char *buf; - size_t cur; - size_t size; -} dyn_buffer_t; - -typedef struct text_buffer { - long max_size; - int last_char_was_whitespace; - dyn_buffer_t dyn_buffer; -} text_buffer_t; - -static int utf8_validchr2(const char *s) { - if (0x00 == (0x80 & *s)) { - return TRUE; - } else if (0xf0 == (0xf8 & *s)) { - if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || - (0x80 != (0xc0 & s[3]))) { - return FALSE; - } - - if (0x80 == (0xc0 & s[4])) { - return FALSE; - } - - if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) { - return FALSE; - } - } else if (0xe0 == (0xf0 & *s)) { - if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) { - return FALSE; - } - - if (0x80 == (0xc0 & s[3])) { - return FALSE; - } - - if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) { - return FALSE; - } - } else if (0xc0 == (0xe0 & *s)) { - if (0x80 != (0xc0 & s[1])) { - return FALSE; - } - - if (0x80 == (0xc0 & s[2])) { - return FALSE; - } - - if (0 == (0x1e & s[0])) { - return FALSE; - } - } else { - return FALSE; - } - - return TRUE; -} - - -static dyn_buffer_t dyn_buffer_create() { - dyn_buffer_t buf; - - buf.size = INITIAL_BUF_SIZE; - buf.cur = 0; - buf.buf = malloc(INITIAL_BUF_SIZE); - - return buf; -} - -static void grow_buffer(dyn_buffer_t *buf, size_t size) { - if (buf->cur + size > buf->size) { - do { - buf->size *= 2; - } while (buf->cur + size > buf->size); - - buf->buf = realloc(buf->buf, buf->size); - } -} - -static void grow_buffer_small(dyn_buffer_t *buf) { - if (buf->cur + sizeof(long) > buf->size) { - buf->size *= 2; - buf->buf = realloc(buf->buf, buf->size); - } -} - -static void dyn_buffer_write(dyn_buffer_t *buf, const void *data, size_t size) { - grow_buffer(buf, size); - - memcpy(buf->buf + buf->cur, data, size); - buf->cur += size; -} - -static void dyn_buffer_write_char(dyn_buffer_t *buf, char c) { - grow_buffer_small(buf); - - *(buf->buf + buf->cur) = c; - buf->cur += sizeof(c); -} - -static void dyn_buffer_write_str(dyn_buffer_t *buf, char *str) { - dyn_buffer_write(buf, str, strlen(str)); - dyn_buffer_write_char(buf, '\0'); -} - -static void dyn_buffer_append_string(dyn_buffer_t *buf, char *str) { - dyn_buffer_write(buf, str, strlen(str)); -} - -static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) { - grow_buffer_small(buf); - - *(int *) (buf->buf + buf->cur) = d; - buf->cur += sizeof(int); -} - -static void dyn_buffer_write_short(dyn_buffer_t *buf, short s) { - grow_buffer_small(buf); - - *(short *) (buf->buf + buf->cur) = s; - buf->cur += sizeof(short); -} - -static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) { - grow_buffer_small(buf); - - *(unsigned long *) (buf->buf + buf->cur) = l; - buf->cur += sizeof(unsigned long); -} - -static void dyn_buffer_destroy(dyn_buffer_t *buf) { - free(buf->buf); -} - -static void text_buffer_destroy(text_buffer_t *buf) { - dyn_buffer_destroy(&buf->dyn_buffer); -} - -static text_buffer_t text_buffer_create(long max_size) { - text_buffer_t text_buf; - - text_buf.dyn_buffer = dyn_buffer_create(); - text_buf.max_size = max_size; - text_buf.last_char_was_whitespace = FALSE; - - return text_buf; -} - -static int text_buffer_append_char(text_buffer_t *buf, int c) { - - if (SHOULD_IGNORE_CHAR(c) || c == ' ') { - if (!buf->last_char_was_whitespace && buf->dyn_buffer.cur != 0) { - dyn_buffer_write_char(&buf->dyn_buffer, ' '); - buf->last_char_was_whitespace = TRUE; - - if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) { - return TEXT_BUF_FULL; - } - } - } else { - buf->last_char_was_whitespace = FALSE; - grow_buffer_small(&buf->dyn_buffer); - - if (((utf8_int32_t) 0xffffff80 & c) == 0) { - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = (char) c; - } else if (((utf8_int32_t) 0xfffff800 & c) == 0) { - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xc0 | (char) (c >> 6); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); - } else if (((utf8_int32_t) 0xffff0000 & c) == 0) { - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xe0 | (char) (c >> 12); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); - } else { - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0xf0 | (char) (c >> 18); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 12) & 0x3f); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) ((c >> 6) & 0x3f); - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); - } - - if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) { - return TEXT_BUF_FULL; - } - } - - return 0; -} - - -static void text_buffer_terminate_string(text_buffer_t *buf) { - if (buf->dyn_buffer.cur > 0 && *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) == ' ') { - *(buf->dyn_buffer.buf + buf->dyn_buffer.cur - 1) = '\0'; - } else { - dyn_buffer_write_char(&buf->dyn_buffer, '\0'); - } -} - -#define UTF8_END_OF_STRING \ - (ptr - str >= len || *ptr == 0 || \ - (0xc0 == (0xe0 & *ptr) && ptr - str > len - 2) || \ - (0xe0 == (0xf0 & *ptr) && ptr - str > len - 3) || \ - (0xf0 == (0xf8 & *ptr) && ptr - str > len - 4)) - -static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t len) { - - const char *ptr = str; - const char *oldPtr = ptr; - - if (str == NULL || UTF8_END_OF_STRING) { - return 0; - } - - if (len <= 4) { - for (int i = 0; i < len; i++) { - if (((utf8_int32_t)0xffffff80 & str[i]) == 0) { - dyn_buffer_write_char(&buf->dyn_buffer, str[i]); - } - } - return 0; - } - - utf8_int32_t c; - char tmp[16]; - - do { - ptr = utf8codepoint(ptr, &c); - *(int *) tmp = 0x00000000; - memcpy(tmp, oldPtr, ptr - oldPtr); - oldPtr = ptr; - - if (!utf8_validchr2(tmp)) { - continue; - } - - int ret = text_buffer_append_char(buf, c); - - if (ret != 0) { - return ret; - } - } while (!UTF8_END_OF_STRING); - - return 0; -} - -static int text_buffer_append_string0(text_buffer_t *buf, char *str) { - return text_buffer_append_string(buf, str, strlen(str)); -} - -static void* read_all(vfile_t *f, size_t *size) { - void* buf = malloc(f->info.st_size); - *size = f->read(f, buf, f->info.st_size); - - //TODO: log - - return buf; -} - -#endif diff --git a/third-party/onion b/third-party/onion deleted file mode 160000 index 2b3b230..0000000 --- a/third-party/onion +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2b3b230b79ecae119b7eb847f2f9545a46bef13c diff --git a/third-party/utf8.h b/third-party/utf8.h deleted file mode 160000 index b686b0c..0000000 --- a/third-party/utf8.h +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b686b0c5181c2dd9f8297e6ac3692c9614b083be