arc component

This commit is contained in:
simon987 2020-03-25 13:44:20 -04:00
parent a24d4dc538
commit 5ba6997a98
14 changed files with 297 additions and 29 deletions

3
.gitignore vendored
View File

@ -5,4 +5,5 @@ libscan.a
libscan.so
*.cbp
CMakeFiles
CMakeCache.txt
CMakeCache.txt
scan_test

6
.gitmodules vendored
View File

@ -4,3 +4,9 @@
[submodule "third-party/utf8.h"]
path = third-party/utf8.h
url = https://github.com/sheredom/utf8.h
[submodule "third-party/libarchive"]
path = third-party/libarchive
url = https://github.com/libarchive/libarchive
[submodule "third-party/zlib"]
path = third-party/zlib
url = https://github.com/madler/zlib

View File

@ -1,33 +1,45 @@
cmake_minimum_required(VERSION 3.15)
project(scan C)
project(scan C)
set(CMAKE_C_STANDARD 11)
find_package(LibArchive REQUIRED)
add_library(
scan
src/text.c src/text.h
src/util.c src/util.h
src/types.h
src/macros.h
libscan/util.c libscan/util.h
libscan/scan.c libscan/scan.h
libscan/macros.h
libscan/text/text.c libscan/text/text.h
libscan/arc/arc.c libscan/arc/arc.h
third-party/utf8.h
)
# Third party
add_subdirectory(third-party/uuid/)
target_include_directories(
scan PRIVATE
third-party/uuid/src/
)
target_compile_options(
scan PRIVATE
scan
PRIVATE
-Werror
-g
)
target_link_libraries(
scan
uuid
)
-static
${LibArchive_LIBRARIES}
)
# test executable
add_executable(
scan_test
test/main.c
)
target_link_libraries(
scan_test
scan
)

4
README.md Normal file
View File

@ -0,0 +1,4 @@
```bash
vcpkg install libarchive
```

167
libscan/arc/arc.c Normal file
View File

@ -0,0 +1,167 @@
#include "arc.h"
#include "../scan.h"
#include "../util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
int should_parse_filtered_file(const char *filepath, int ext) {
char tmp[PATH_MAX * 2];
if (ext == 0) {
return FALSE;
}
memcpy(tmp, filepath, ext - 1);
*(tmp + ext - 1) = '\0';
char *idx = strrchr(tmp, '.');
if (idx == NULL) {
return FALSE;
}
if (strcmp(idx, ".tar") == 0) {
return TRUE;
}
return FALSE;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = f;
int ret = 0;
if (data.f->is_fs_file) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open_filename(a, doc->filepath, ARC_BUF_SIZE);
} else if (ctx->mode == ARC_MODE_RECURSE) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
} else {
return SCAN_OK;
}
if (ret != ARCHIVE_OK) {
//TODO: log
// LOG_ERRORF(doc->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a);
return SCAN_ERR_READ;
}
if (ctx->mode == ARC_MODE_LIST) {
dyn_buffer_t buf = dyn_buffer_create();
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
char *path = (char *) archive_entry_pathname(entry);
dyn_buffer_append_string(&buf, path);
dyn_buffer_write_char(&buf, '\n');
}
}
dyn_buffer_write_char(&buf, '\0');
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
meta_list->key = MetaContent;
strcpy(meta_list->str_val, buf.buf);
APPEND_META(doc, meta_list);
dyn_buffer_destroy(&buf);
} else {
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
memcpy(sub_job->parent, doc->uuid, sizeof(uuid_t));
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->info = *archive_entry_stat(entry);
if (S_ISREG(sub_job->info.st_mode)) {
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
char *p = strrchr(sub_job->filepath, '.');
if (p != NULL) {
sub_job->ext = (int) (p - sub_job->filepath + 1);
} else {
sub_job->ext = (int) strlen(sub_job->filepath);
}
//TODO:
// parse(sub_job);
}
}
free(sub_job);
}
archive_read_free(a);
return SCAN_OK;
}

26
libscan/arc/arc.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef SCAN_ARC_H
#define SCAN_ARC_H
#include <archive.h>
#include <archive_entry.h>
#include "../scan.h"
#define ARC_MODE_SKIP 0
#define ARC_MODE_LIST 1
#define ARC_MODE_SHALLOW 2
#define ARC_MODE_RECURSE 3
typedef int archive_mode_t;
typedef struct {
archive_mode_t mode;
} scan_arc_ctx_t;
#define ARC_BUF_SIZE 8192
int should_parse_filtered_file(const char *filepath, int ext);
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc);
int arc_read(struct vfile * f, void *buf, size_t size);
#endif

View File

@ -11,3 +11,7 @@
#undef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif

24
libscan/scan.c Normal file
View File

@ -0,0 +1,24 @@
#include "scan.h"
#include <fcntl.h>
#include <unistd.h>
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
//TODO: log
// LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1;
}
}
return read(f->fd, buf, size);
}
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}

View File

@ -1,18 +1,11 @@
#ifndef SCAN_TYPES_H
#define SCAN_TYPES_H
#ifndef SCAN_SCAN_H
#define SCAN_SCAN_H
#include <stdio.h>
#include <sys/stat.h>
#include "uuid.h"
#include "../third-party/uuid/src/uuid.h"
// TODO
#define ARC_MODE_SKIP 0
#define ARC_MODE_LIST 1
#define ARC_MODE_SHALLOW 2
#define ARC_MODE_RECURSE 3
typedef int archive_mode_t;
#define META_INT_MASK 0x80
#define META_STR_MASK 0x40
#define META_LONG_MASK 0x20
@ -121,4 +114,11 @@ typedef struct parse_job_t {
doc->meta_tail = meta;\
}
#endif
void fs_close(struct vfile *f);
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
int fs_read(struct vfile *f, void *buf, size_t size);

View File

@ -21,6 +21,8 @@ scan_code_t parse_text(scan_text_ctx_t *ctx, struct vfile *f, document_t *doc) {
APPEND_META(doc, meta)
printf("%s", meta->str_val);
free(buf);
text_buffer_destroy(&tex);

View File

@ -1,8 +1,8 @@
#ifndef SCAN_TEXT_H
#define SCAN_TEXT_H
#include "types.h"
#include "util.h"
#include "../scan.h"
#include "../util.h"
typedef struct {
long content_size;

22
test/main.c Normal file
View File

@ -0,0 +1,22 @@
#include "../libscan/text/text.h"
#include <fcntl.h>
#include "../libscan/arc/arc.h"
int main() {
scan_text_ctx_t ctx;
ctx.content_size = 100;
vfile_t file;
file.is_fs_file = TRUE;
file.filepath = "/home/simon/Downloads/libscan/CMakeLists.txt";
file.fd = open("/home/simon/Downloads/libscan/CMakeLists.txt", O_RDONLY);
file.read = fs_read;
document_t doc;
doc.meta_head = NULL;
doc.meta_tail = NULL;
doc.size = 200;
parse_text(&ctx, &file, &doc);
}