From 5ba6997a986e81db0fe369d2710c8f487632212a Mon Sep 17 00:00:00 2001 From: simon987 Date: Wed, 25 Mar 2020 13:44:20 -0400 Subject: [PATCH] arc component --- .gitignore | 3 +- .gitmodules | 6 ++ CMakeLists.txt | 44 +++++---- README.md | 4 + libscan/arc/arc.c | 167 ++++++++++++++++++++++++++++++++++ libscan/arc/arc.h | 26 ++++++ {src => libscan}/macros.h | 4 + libscan/scan.c | 24 +++++ src/types.h => libscan/scan.h | 20 ++-- {src => libscan/text}/text.c | 2 + {src => libscan/text}/text.h | 4 +- {src => libscan}/util.c | 0 {src => libscan}/util.h | 0 test/main.c | 22 +++++ 14 files changed, 297 insertions(+), 29 deletions(-) create mode 100644 README.md create mode 100644 libscan/arc/arc.c create mode 100644 libscan/arc/arc.h rename {src => libscan}/macros.h (80%) create mode 100644 libscan/scan.c rename src/types.h => libscan/scan.h (92%) rename {src => libscan/text}/text.c (95%) rename {src => libscan/text}/text.h (81%) rename {src => libscan}/util.c (100%) rename {src => libscan}/util.h (100%) create mode 100644 test/main.c diff --git a/.gitignore b/.gitignore index a7a837c..d55133b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ libscan.a libscan.so *.cbp CMakeFiles -CMakeCache.txt \ No newline at end of file +CMakeCache.txt +scan_test \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index bad7d45..0fcf539 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,9 @@ [submodule "third-party/utf8.h"] path = third-party/utf8.h url = https://github.com/sheredom/utf8.h +[submodule "third-party/libarchive"] + path = third-party/libarchive + url = https://github.com/libarchive/libarchive +[submodule "third-party/zlib"] + path = third-party/zlib + url = https://github.com/madler/zlib diff --git a/CMakeLists.txt b/CMakeLists.txt index ac2dc6e..4fe427e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,33 +1,45 @@ cmake_minimum_required(VERSION 3.15) -project(scan C) +project(scan C) set(CMAKE_C_STANDARD 11) +find_package(LibArchive REQUIRED) add_library( scan - src/text.c src/text.h - src/util.c src/util.h - src/types.h - src/macros.h + libscan/util.c libscan/util.h + libscan/scan.c libscan/scan.h + libscan/macros.h + + libscan/text/text.c libscan/text/text.h + libscan/arc/arc.c libscan/arc/arc.h third-party/utf8.h ) - -# Third party -add_subdirectory(third-party/uuid/) -target_include_directories( - scan PRIVATE - third-party/uuid/src/ -) - target_compile_options( - scan PRIVATE + scan + PRIVATE -Werror + -g ) target_link_libraries( scan - uuid -) \ No newline at end of file + -static + ${LibArchive_LIBRARIES} +) + + +# test executable +add_executable( + scan_test + test/main.c +) + + +target_link_libraries( + scan_test + scan +) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..4dc6cf5 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ + +```bash +vcpkg install libarchive +``` \ No newline at end of file diff --git a/libscan/arc/arc.c b/libscan/arc/arc.c new file mode 100644 index 0000000..2e86672 --- /dev/null +++ b/libscan/arc/arc.c @@ -0,0 +1,167 @@ +#include "arc.h" + +#include "../scan.h" +#include "../util.h" + +#include +#include +#include +#include + + + +int should_parse_filtered_file(const char *filepath, int ext) { + char tmp[PATH_MAX * 2]; + + if (ext == 0) { + return FALSE; + } + + memcpy(tmp, filepath, ext - 1); + *(tmp + ext - 1) = '\0'; + + char *idx = strrchr(tmp, '.'); + + if (idx == NULL) { + return FALSE; + } + + if (strcmp(idx, ".tar") == 0) { + return TRUE; + } + + return FALSE; +} + +int arc_read(struct vfile *f, void *buf, size_t size) { + return archive_read_data(f->arc, buf, size); +} + +typedef struct arc_data { + vfile_t *f; + char buf[ARC_BUF_SIZE]; +} arc_data_f; + +int vfile_open_callback(struct archive *a, void *user_data) { + arc_data_f *data = user_data; + + if (data->f->is_fs_file && data->f->fd == -1) { + data->f->fd = open(data->f->filepath, O_RDONLY); + } + + return ARCHIVE_OK; +} + +long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { + arc_data_f *data = user_data; + + *buf = data->buf; + return data->f->read(data->f, data->buf, ARC_BUF_SIZE); +} + +int vfile_close_callback(struct archive *a, void *user_data) { + arc_data_f *data = user_data; + + if (data->f->close != NULL) { + data->f->close(data->f); + } + + return ARCHIVE_OK; +} + +scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { + + struct archive *a; + struct archive_entry *entry; + + + arc_data_f data; + data.f = f; + + int ret = 0; + if (data.f->is_fs_file) { + + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE); + } else if (ctx->mode == ARC_MODE_RECURSE) { + + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open( + a, &data, + vfile_open_callback, + vfile_read_callback, + vfile_close_callback + ); + } else { + return SCAN_OK; + } + + if (ret != ARCHIVE_OK) { + //TODO: log +// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a)) + archive_read_free(a); + return SCAN_ERR_READ; + } + + if (ctx->mode == ARC_MODE_LIST) { + + dyn_buffer_t buf = dyn_buffer_create(); + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + if (S_ISREG(archive_entry_stat(entry)->st_mode)) { + + char *path = (char *) archive_entry_pathname(entry); + + dyn_buffer_append_string(&buf, path); + dyn_buffer_write_char(&buf, '\n'); + } + } + dyn_buffer_write_char(&buf, '\0'); + + meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur); + meta_list->key = MetaContent; + strcpy(meta_list->str_val, buf.buf); + APPEND_META(doc, meta_list); + dyn_buffer_destroy(&buf); + + } else { + + parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2); + + sub_job->vfile.close = NULL; + sub_job->vfile.read = arc_read; + sub_job->vfile.arc = a; + sub_job->vfile.filepath = sub_job->filepath; + sub_job->vfile.is_fs_file = FALSE; + memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t)); + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + sub_job->info = *archive_entry_stat(entry); + if (S_ISREG(sub_job->info.st_mode)) { + sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry)); + sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; + + char *p = strrchr(sub_job->filepath, '.'); + if (p != NULL) { + sub_job->ext = (int) (p - sub_job->filepath + 1); + } else { + sub_job->ext = (int) strlen(sub_job->filepath); + } + + //TODO: +// parse(sub_job); + } + } + + free(sub_job); + } + + archive_read_free(a); + return SCAN_OK; +} diff --git a/libscan/arc/arc.h b/libscan/arc/arc.h new file mode 100644 index 0000000..3c0e95f --- /dev/null +++ b/libscan/arc/arc.h @@ -0,0 +1,26 @@ +#ifndef SCAN_ARC_H +#define SCAN_ARC_H + +#include +#include +#include "../scan.h" + +#define ARC_MODE_SKIP 0 +#define ARC_MODE_LIST 1 +#define ARC_MODE_SHALLOW 2 +#define ARC_MODE_RECURSE 3 +typedef int archive_mode_t; + +typedef struct { + archive_mode_t mode; +} scan_arc_ctx_t; + +#define ARC_BUF_SIZE 8192 + +int should_parse_filtered_file(const char *filepath, int ext); + +scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc); + +int arc_read(struct vfile * f, void *buf, size_t size); + +#endif diff --git a/src/macros.h b/libscan/macros.h similarity index 80% rename from src/macros.h rename to libscan/macros.h index 5033f70..a84ba38 100644 --- a/src/macros.h +++ b/libscan/macros.h @@ -11,3 +11,7 @@ #undef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif diff --git a/libscan/scan.c b/libscan/scan.c new file mode 100644 index 0000000..0a18d2c --- /dev/null +++ b/libscan/scan.c @@ -0,0 +1,24 @@ +#include "scan.h" + +#include +#include + +int fs_read(struct vfile *f, void *buf, size_t size) { + if (f->fd == -1) { + f->fd = open(f->filepath, O_RDONLY); + if (f->fd == -1) { + //TODO: log +// LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno)) + return -1; + } + } + + return read(f->fd, buf, size); +} + + +void fs_close(struct vfile *f) { + if (f->fd != -1) { + close(f->fd); + } +} diff --git a/src/types.h b/libscan/scan.h similarity index 92% rename from src/types.h rename to libscan/scan.h index 7bf4913..36a9b3d 100644 --- a/src/types.h +++ b/libscan/scan.h @@ -1,18 +1,11 @@ -#ifndef SCAN_TYPES_H -#define SCAN_TYPES_H +#ifndef SCAN_SCAN_H +#define SCAN_SCAN_H #include #include -#include "uuid.h" +#include "../third-party/uuid/src/uuid.h" -// TODO -#define ARC_MODE_SKIP 0 -#define ARC_MODE_LIST 1 -#define ARC_MODE_SHALLOW 2 -#define ARC_MODE_RECURSE 3 -typedef int archive_mode_t; - #define META_INT_MASK 0x80 #define META_STR_MASK 0x40 #define META_LONG_MASK 0x20 @@ -121,4 +114,11 @@ typedef struct parse_job_t { doc->meta_tail = meta;\ } + #endif + +void fs_close(struct vfile *f); + +#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);}; + +int fs_read(struct vfile *f, void *buf, size_t size); diff --git a/src/text.c b/libscan/text/text.c similarity index 95% rename from src/text.c rename to libscan/text/text.c index 7e43519..aec00cd 100644 --- a/src/text.c +++ b/libscan/text/text.c @@ -21,6 +21,8 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc) { APPEND_META(doc, meta) + printf("%s", meta->str_val); + free(buf); text_buffer_destroy(&tex); diff --git a/src/text.h b/libscan/text/text.h similarity index 81% rename from src/text.h rename to libscan/text/text.h index bae8f51..aa1ea29 100644 --- a/src/text.h +++ b/libscan/text/text.h @@ -1,8 +1,8 @@ #ifndef SCAN_TEXT_H #define SCAN_TEXT_H -#include "types.h" -#include "util.h" +#include "../scan.h" +#include "../util.h" typedef struct { long content_size; diff --git a/src/util.c b/libscan/util.c similarity index 100% rename from src/util.c rename to libscan/util.c diff --git a/src/util.h b/libscan/util.h similarity index 100% rename from src/util.h rename to libscan/util.h diff --git a/test/main.c b/test/main.c new file mode 100644 index 0000000..821cacc --- /dev/null +++ b/test/main.c @@ -0,0 +1,22 @@ +#include "../libscan/text/text.h" +#include +#include "../libscan/arc/arc.h" + +int main() { + + scan_text_ctx_t ctx; + + ctx.content_size = 100; + vfile_t file; + file.is_fs_file = TRUE; + file.filepath = "/home/simon/Downloads/libscan/CMakeLists.txt"; + file.fd = open("/home/simon/Downloads/libscan/CMakeLists.txt", O_RDONLY); + file.read = fs_read; + + document_t doc; + doc.meta_head = NULL; + doc.meta_tail = NULL; + + doc.size = 200; + parse_text(&ctx, &file, &doc); +} \ No newline at end of file