diff --git a/.gitignore b/.gitignore index 6b82d53..5339f1a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ libscan.so CMakeFiles CMakeCache.txt scan_test -third-party/ \ No newline at end of file +third-party/ +libscan-test-files \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f12610..a6b6dfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.15) -project(scan C) +project(scan) set(CMAKE_C_STANDARD 11) add_library( @@ -19,9 +19,10 @@ add_library( third-party/utf8.h ) +set_target_properties(scan PROPERTIES LINKER_LANGUAGE C) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) -target_link_directories(scan PRIVATE BEFORE /usr/share/vcpkg/installed/x64-linux/lib/) +target_link_directories(scan PUBLIC /usr/share/vcpkg/installed/x64-linux/lib/) find_package(LibArchive REQUIRED) find_package(BZip2 REQUIRED) @@ -48,13 +49,14 @@ target_compile_options( -g ) -SET(CMAKE_C_LINK_EXECUTABLE "g++ -o ") +#SET(CMAKE_C_LINK_EXECUTABLE "g++ -o ") string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}") string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}") target_link_libraries( scan + PUBLIC ${LibArchive_LIBRARIES} ZLIB::ZLIB @@ -84,6 +86,14 @@ target_link_libraries( ${FFMPEG_LIBRARIES} z + # TODO: compile ffmpeg with those disabled? + va + va-drm + va-x11 + X11 + vdpau + + ${CMAKE_THREAD_LIBS_INIT} uuid @@ -91,10 +101,20 @@ target_link_libraries( target_include_directories( scan - BEFORE PUBLIC ${MUPDF_INC_DIR} ${JPEG_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ${FFMPEG_INCLUDE_DIR} ) + +# Testing +find_package(GTest CONFIG REQUIRED) + +add_executable(scan_ub_test test/main.cpp test/test_util.cpp test/test_util.h) +target_compile_options(scan_ub_test PRIVATE -g -fsanitize=undefined -fno-omit-frame-pointer) +target_link_libraries(scan_ub_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=undefined scan) + +add_executable(scan_a_test test/main.cpp test/test_util.cpp test/test_util.h) +target_compile_options(scan_a_test PRIVATE -g -fsanitize=address -fno-omit-frame-pointer) +target_link_libraries(scan_a_test PRIVATE GTest::gtest GTest::gtest_main -fsanitize=address scan) diff --git a/README.md b/README.md index 0d70ad4..ddd3f98 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ *(wip)* ```bash -vcpkg install libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd +vcpkg install libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 ffmpeg zstd gtest cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . make -j 4 diff --git a/build.sh b/build.sh index 2d3464b..a456538 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -export CC=gcc -export CXX=g++ +export CC=clang +export CXX=clang++ rm -rf CMakeFiles CMakeCache.txt cmake -DCMAKE_TOOLCHAIN_FILE=/usr/share/vcpkg/scripts/buildsystems/vcpkg.cmake . || exit diff --git a/libscan/scan.h b/libscan/scan.h index 79961f9..6a70c2c 100644 --- a/libscan/scan.h +++ b/libscan/scan.h @@ -115,10 +115,11 @@ typedef struct vfile { union { int fd; struct archive *arc; + const void *_test_data; }; int is_fs_file; - char *filepath; + const char *filepath; struct stat info; read_func_t read; diff --git a/libscan/text/text.c b/libscan/text/text.c index 7e84c40..8493192 100644 --- a/libscan/text/text.c +++ b/libscan/text/text.c @@ -1,8 +1,8 @@ #include "text.h" -scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc) { +scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) { - int to_read = MIN(ctx->content_size, doc->size); + int to_read = MIN(ctx->content_size, f->info.st_size); char *buf = malloc(to_read); int ret = f->read(f, buf, to_read); @@ -22,8 +22,6 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc) { APPEND_META(doc, meta) - printf("%s", meta->str_val); - free(buf); text_buffer_destroy(&tex); diff --git a/libscan/text/text.h b/libscan/text/text.h index bf11bdc..28fb3c6 100644 --- a/libscan/text/text.h +++ b/libscan/text/text.h @@ -9,9 +9,8 @@ typedef struct { log_callback_t log; logf_callback_t logf; - store_callback_t store; } scan_text_ctx_t; -scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc); +scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc); #endif diff --git a/libscan/util.h b/libscan/util.h index b584b6e..813c7f6 100644 --- a/libscan/util.h +++ b/libscan/util.h @@ -81,7 +81,7 @@ static dyn_buffer_t dyn_buffer_create() { buf.size = INITIAL_BUF_SIZE; buf.cur = 0; - buf.buf = malloc(INITIAL_BUF_SIZE); + buf.buf = (char*)malloc(INITIAL_BUF_SIZE); return buf; } @@ -92,14 +92,14 @@ static void grow_buffer(dyn_buffer_t *buf, size_t size) { buf->size *= 2; } while (buf->cur + size > buf->size); - buf->buf = realloc(buf->buf, buf->size); + buf->buf = (char*)realloc(buf->buf, buf->size); } } static void grow_buffer_small(dyn_buffer_t *buf) { if (buf->cur + sizeof(long) > buf->size) { buf->size *= 2; - buf->buf = realloc(buf->buf, buf->size); + buf->buf = (char*)realloc(buf->buf, buf->size); } } @@ -172,7 +172,7 @@ static int text_buffer_append_char(text_buffer_t *buf, int c) { dyn_buffer_write_char(&buf->dyn_buffer, ' '); buf->last_char_was_whitespace = TRUE; - if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) { + if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) { return TEXT_BUF_FULL; } } @@ -196,7 +196,7 @@ static int text_buffer_append_char(text_buffer_t *buf, int c) { *(buf->dyn_buffer.buf + buf->dyn_buffer.cur++) = 0x80 | (char) (c & 0x3f); } - if (buf->max_size > 0 && buf->dyn_buffer.cur >= buf->max_size) { + if (buf->max_size > 0 && buf->dyn_buffer.cur > buf->max_size) { return TEXT_BUF_FULL; } } @@ -241,7 +241,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t char tmp[16]; do { - ptr = utf8codepoint(ptr, &c); + ptr = (char*)utf8codepoint(ptr, &c); *(int *) tmp = 0x00000000; memcpy(tmp, oldPtr, ptr - oldPtr); oldPtr = ptr; diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 0000000..c0c22a0 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,62 @@ +#include "gtest/gtest.h" +#include "test_util.h" + +extern "C" { +#include "../libscan/arc/arc.h" +#include "../libscan/text/text.h" +} + +static scan_arc_ctx_t arc_recurse_ctx; +static scan_arc_ctx_t arc_list_ctx; +static scan_text_ctx_t text_500_ctx; + + +TEST(TextTest, BookCsvContentLen) { + const char *filepath = "libscan-test-files/test_files/text/books.csv"; + + vfile_t f; + document_t doc; + doc.meta_head = nullptr; + doc.meta_tail = nullptr; + load_file(filepath, &f); + parse_text(&text_500_ctx, &f, &doc); + + ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1); + CLOSE_FILE(f) + destroy_doc(&doc); +} + +TEST(TextTest, MemUtf8_1) { + const char *content = "a"; //todo + + vfile_t f; + document_t doc; + doc.meta_head = nullptr; + doc.meta_tail = nullptr; + load_mem((void *) content, strlen(content), &f); + parse_text(&text_500_ctx, &f, &doc); + + ASSERT_EQ(strlen(get_meta(&doc, MetaContent)->str_val), 1); + destroy_doc(&doc); +} + + +int main(int argc, char **argv) { + arc_recurse_ctx.log = noop_log; + arc_recurse_ctx.logf = noop_logf; + arc_recurse_ctx.store = noop_store; + arc_recurse_ctx.mode = ARC_MODE_RECURSE; + arc_recurse_ctx.parse = nullptr; //TODO + + arc_list_ctx.log = noop_log; + arc_list_ctx.logf = noop_logf; + arc_list_ctx.store = noop_store; + arc_list_ctx.mode = ARC_MODE_LIST; + + text_500_ctx.content_size = 500; + text_500_ctx.log = noop_log; + text_500_ctx.logf = noop_logf; + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/test/test_util.cpp b/test/test_util.cpp new file mode 100644 index 0000000..8aef3c1 --- /dev/null +++ b/test/test_util.cpp @@ -0,0 +1,76 @@ +#include "test_util.h" +#include + +#include +#include + +#define FILE_NOT_FOUND_ERR "Could not file, did you clone the test files repo?" + + +int fs_read(struct vfile *f, void *buf, size_t size) { + + if (f->fd == -1) { + f->fd = open(f->filepath, O_RDONLY); + if (f->fd == -1) { + return -1; + } + } + + return read(f->fd, buf, size); +} + +//Note: No out of bounds check +int mem_read(vfile_t *f, void *buf, size_t size) { + memcpy(buf, f->_test_data, size); + f->_test_data = (char *) f->_test_data + size; + return 0; +} + +void fs_close(vfile_t *f) { + if (f->fd != -1) { + close(f->fd); + } +} + +void load_file(const char *filepath, vfile_t *f) { + stat(filepath, &f->info); + f->fd = open(filepath, O_RDONLY); + + if (f->fd == -1) { + FAIL() << FILE_NOT_FOUND_ERR; + } + + f->filepath = filepath; + f->read = fs_read; + f->close = fs_close; + f->is_fs_file = TRUE; +} + +void load_mem(void *mem, size_t size, vfile_t *f) { + f->filepath = "_mem_"; + f->_test_data = mem; + f->info.st_size = size; + f->read = mem_read; + f->close = nullptr; + f->is_fs_file = TRUE; +} + +meta_line_t *get_meta(document_t *doc, metakey key) { + meta_line_t *meta = doc->meta_head; + while (meta != nullptr) { + if (meta->key == key) { + return meta; + } + meta = meta->next; + } + return nullptr; +} + +void destroy_doc(document_t *doc) { + meta_line_t *meta = doc->meta_head; + while (meta != nullptr) { + meta_line_t *tmp = meta; + meta = tmp->next; + free(tmp); + } +} diff --git a/test/test_util.h b/test/test_util.h new file mode 100644 index 0000000..3df9400 --- /dev/null +++ b/test/test_util.h @@ -0,0 +1,28 @@ +#ifndef SCAN_TEST_UTIL_H +#define SCAN_TEST_UTIL_H + +#include "../libscan/scan.h" + +void load_file(const char *filepath, vfile_t *f); +void load_mem(void *mem, size_t size, vfile_t *f); + +static void noop_logf(char *filepath, int level, char *format, ...) { + // noop +} + +static void noop_log(char *filepath, int level, char *str) { + // noop +} + +static void noop_store(char* key, size_t key_len, char *value, size_t value_len) { + // noop +} + +meta_line_t *get_meta(document_t *doc, metakey key); + + +#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);}; + +void destroy_doc(document_t *doc); + +#endif