From 3990b9207da4dd8736a7e574582fedf9d3c86700 Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 11 Jan 2020 15:04:25 -0500 Subject: [PATCH] wip --- .gitmodules | 3 + CMakeLists.txt | 27 +++- extract.c | 318 ++++++++++++++++++++++++++++++++++++++++++++ extract.h | 19 +++ main.c | 63 ++++++++- thirdparty/argparse | 1 + tpool.c | 217 ++++++++++++++++++++++++++++++ tpool.h | 19 +++ 8 files changed, 659 insertions(+), 8 deletions(-) create mode 100644 .gitmodules create mode 100644 extract.c create mode 100644 extract.h create mode 160000 thirdparty/argparse create mode 100644 tpool.c create mode 100644 tpool.h diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a21ae9d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdparty/argparse"] + path = thirdparty/argparse + url = https://github.com/cofyc/argparse diff --git a/CMakeLists.txt b/CMakeLists.txt index 4477b08..a9bd7c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,11 +2,16 @@ cmake_minimum_required(VERSION 3.7) project(deepextract C) -set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD 11) option(STATIC_BUILD "Static build" off) add_executable( - deepextract main.c + deepextract + main.c + thirdparty/argparse/argparse.c + thirdparty/argparse/argparse.h + extract.c extract.h + tpool.c tpool.h ) if (STATIC_BUILD) @@ -15,10 +20,28 @@ if (STATIC_BUILD) -static archive acl + pthread + + lzma + lz4 + zstd + bz2 + z + crypto ) else () target_link_libraries( deepextract archive + pthread ) endif () + +target_compile_options( + deepextract + PRIVATE + -Ofast + -fno-stack-protector + -fomit-frame-pointer +) + diff --git a/extract.c b/extract.c new file mode 100644 index 0000000..8694103 --- /dev/null +++ b/extract.c @@ -0,0 +1,318 @@ +#include "extract.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARC_BUF_SIZE 8192 + +tpool_t *pool; + +int Verbose; +int RootLen; +int Flatten; +char *DstPath; + +typedef struct vfile vfile_t; + +const char *archive_extensions[] = { + ".iso", + ".zip", + ".rar", + ".ar", + ".arc", + ".warc", + ".7z", + ".tgz", + ".tar.gz", + ".tar.zstd", + ".tar.xz", + ".tar.bz", + ".tar.bz2", + ".tar.lz4", + ".tar.lzma", + ".docx", + ".pptx", + ".xlsx", + ".epub", + ".cbz", + ".jar", + ".deb", + ".rpm", + ".xpi", +}; + +typedef int (*read_func_t)(struct vfile *, void *buf, size_t size); + +typedef void (*close_func_t)(struct vfile *); + +typedef struct vfile { + union { + int fd; + struct archive *arc; + }; + + int is_fs_file; + char *filepath; + struct stat info; + read_func_t read; + close_func_t close; +} vfile_t; + +typedef struct { + struct vfile vfile; + int base; + char filepath[1]; +} job_t; + +typedef struct arc_data { + vfile_t *f; + char buf[ARC_BUF_SIZE]; +} arc_data_f; + +int ends_with(const char *str, const char *suffix) { + size_t str_len = strlen(str); + size_t suffix_len = strlen(suffix); + return (str_len >= suffix_len) && (memcmp(str + str_len - suffix_len, suffix, sizeof(char) * suffix_len) == 0); +} + +int is_archive(char *filepath, int base) { + for (int i = 0; i < (sizeof(archive_extensions) / sizeof(archive_extensions[0])); i++) { + if (ends_with(filepath + base, archive_extensions[i])) { + return TRUE; + } + } + return FALSE; +} + + +int vfile_open_callback(struct archive *a, void *user_data) { + arc_data_f *data = user_data; + + if (data->f->is_fs_file && data->f->fd == -1) { + data->f->fd = open(data->f->filepath, O_RDONLY); + } + + return ARCHIVE_OK; +} + + +#define IS_EEXIST_ERR(ret) (ret < 0 && errno == EEXIST) + +void copy_or_link(vfile_t *src, const char *dst) { + + char new_name[8192]; + + if (src->is_fs_file) { + int ret = link(src->filepath, dst); + + if (IS_EEXIST_ERR(ret)) { + int i = 1; + do { + sprintf(new_name, "%s.%d", dst, i); + ret = link(src->filepath, new_name); + i++; + } while (IS_EEXIST_ERR(ret)); + } + } else { + int ret = open(dst, O_WRONLY | O_CREAT | O_EXCL, src->info.st_mode); + + if (IS_EEXIST_ERR(ret)) { + int i = 1; + do { + sprintf(new_name, "%s.%d", dst, i); + ret = open(new_name, O_WRONLY | O_CREAT | O_EXCL, src->info.st_mode); + i++; + } while (IS_EEXIST_ERR(ret)); + } + + if (ret > 0) { + int fd = ret; + char buf[ARC_BUF_SIZE]; + + while (1) { + ret = src->read(src, buf, ARC_BUF_SIZE); + + write(fd, buf, ARC_BUF_SIZE); + if (ret != ARC_BUF_SIZE) { + break; + } + } + close(fd); + } + } +} + +long vfile_read_callback(struct archive *a, void *user_data, const void **buf) { + arc_data_f *data = user_data; + + *buf = data->buf; + return data->f->read(data->f, data->buf, ARC_BUF_SIZE); +} + +int vfile_close_callback(struct archive *a, void *user_data) { + arc_data_f *data = user_data; + + if (data->f->close != NULL) { + data->f->close(data->f); + } + + return ARCHIVE_OK; +} + +int arc_read(struct vfile *f, void *buf, size_t size) { + return archive_read_data(f->arc, buf, size); +} + + +void handle_file(void *arg) { + job_t *job = arg; + + if (is_archive(job->filepath, job->base)) { + struct archive *a; + struct archive_entry *entry; + + arc_data_f data; + data.f = &job->vfile; + + int ret = 0; + if (data.f->is_fs_file) { + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open_filename(a, job->filepath, ARC_BUF_SIZE); + } else { + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open( + a, &data, + vfile_open_callback, + vfile_read_callback, + vfile_close_callback + ); + } + + if (ret != ARCHIVE_OK) { + fprintf(stderr, "(arc.c) %s [%d] %s", job->filepath, ret, archive_error_string(a)); + archive_read_free(a); + return; + } + + job_t *sub_job = malloc(sizeof(job_t) + 8192); + + sub_job->vfile.close = NULL; + sub_job->vfile.read = arc_read; + sub_job->vfile.arc = a; + sub_job->vfile.filepath = sub_job->filepath; + sub_job->vfile.is_fs_file = FALSE; + + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + if (S_ISREG(archive_entry_filetype(entry))) { + sprintf(sub_job->filepath, "%s#/%s", job->vfile.filepath, archive_entry_pathname(entry)); + sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; + sub_job->vfile.info = *archive_entry_stat(entry); + handle_file(sub_job); + } + } + + free(sub_job); + + } else { + char *relpath = job->filepath + RootLen; + char dstpath[8192]; + strcpy(dstpath, DstPath); + + if (Flatten) { + strcat(dstpath, job->filepath + job->base); + } else { + strcat(dstpath, relpath); + } + + if (Verbose) { + printf("%s -> %s\n", relpath, dstpath); + } + + copy_or_link(&job->vfile, dstpath); + } +} + + +int fs_read(struct vfile *f, void *buf, size_t size) { + + if (f->fd == -1) { + f->fd = open(f->filepath, O_RDONLY); + if (f->fd == -1) { + return -1; + } + } + + return read(f->fd, buf, size); +} + +#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);}; + +void fs_close(struct vfile *f) { + if (f->fd != -1) { + close(f->fd); + } +} + +job_t *create_fs_job(const char *filepath, int base, struct stat info) { + int len = (int) strlen(filepath); + job_t *job = malloc(sizeof(job_t) + len); + + strcpy(job->filepath, filepath); + + job->base = base; + + job->vfile.filepath = job->filepath; + job->vfile.read = fs_read; + job->vfile.close = fs_close; + job->vfile.close = fs_close; + job->vfile.info = info; + job->vfile.is_fs_file = 1; + job->vfile.fd = -1; + + return job; +} + +int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { + if (typeflag == FTW_F && S_ISREG(info->st_mode)) { + job_t *job = create_fs_job(filepath, ftw->base, *info); + tpool_add_work(pool, handle_file, job); + } + + return 0; +} + +int walk_directory_tree(const char *dirpath) { + return nftw(dirpath, handle_entry, 15, FTW_PHYS); +} + +int extract(args_t *args) { + + Verbose = args->verbose; + DstPath = "/home/drone/Documents/test/"; + RootLen = 22; + Flatten = 1; + + pool = tpool_create(args->threads); + tpool_start(pool); + + walk_directory_tree("/home/drone/Downloads/"); + + tpool_wait(pool); + tpool_destroy(pool); + + return 0; +} + diff --git a/extract.h b/extract.h new file mode 100644 index 0000000..2e0cdcf --- /dev/null +++ b/extract.h @@ -0,0 +1,19 @@ +#ifndef DEEPEXTRACT_EXTRACT_H +#define DEEPEXTRACT_EXTRACT_H + +#include "tpool.h" + +#define TRUE 1 +#define FALSE 0 +#define _XOPEN_SOURCE 500 + +typedef struct args { + int version; + int verbose; + int dry_run; + int threads; +} args_t; + +int extract(args_t *args); + +#endif diff --git a/main.c b/main.c index ce76064..bda3922 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,60 @@ #include -#include +#include "thirdparty/argparse/argparse.h" +#include "extract.h" -int main() { - struct archive *a = archive_read_new(); - archive_read_disk_open(a, ""); - printf("Hello, World!\n"); - return 0; +#define DESCRIPTION "" +#define EPILOG "Made by simon987 . Released under GPL-3.0" + + +static const char *const Version = "1.0"; +static const char *const usage[] = { + "deepextract [OPTION]... SOURCE DESTINATION", + NULL, +}; + +int validate_args(args_t *args) { + + if (args->dry_run != 0) { + args->verbose = TRUE; + } + + if (args->threads <= 0) { + fprintf(stderr, "Invalid thread count"); + return FALSE; + } + return TRUE; +} + +int main(int argc, const char **argv) { + + args_t args = {0, 0, 0, 1}; + + struct argparse_option options[] = { + OPT_HELP(), + + OPT_BOOLEAN('v', "version", &args.version, "Show version and exit"), + OPT_BOOLEAN(0, "verbose", &args.verbose, "Turn on logging"), + OPT_BOOLEAN(0, "dry-run", &args.dry_run, "Don't modify filesystem (implies --verbose)"), + + OPT_GROUP("Options"), + OPT_INTEGER('t', "threads", &args.threads, "Thread count"), + + OPT_END(), + }; + + struct argparse argparse; + argparse_init(&argparse, options, usage, 0); + argparse_describe(&argparse, DESCRIPTION, EPILOG); + argc = argparse_parse(&argparse, argc, argv); + + if (args.version) { + printf(Version); + return 0; + } + + if (!validate_args(&args)) { + return -1; + } + + return extract(&args); } diff --git a/thirdparty/argparse b/thirdparty/argparse new file mode 160000 index 0000000..fafc503 --- /dev/null +++ b/thirdparty/argparse @@ -0,0 +1 @@ +Subproject commit fafc503d23d077bda40c29e8a20ea74707452721 diff --git a/tpool.c b/tpool.c new file mode 100644 index 0000000..82b4039 --- /dev/null +++ b/tpool.c @@ -0,0 +1,217 @@ +#include "tpool.h" +#include +#include +#include + +typedef void (*thread_func_t)(void *arg); + +typedef struct tpool_work { + void *arg; + thread_func_t func; + struct tpool_work *next; +} tpool_work_t; + +typedef struct tpool { + tpool_work_t *work_head; + tpool_work_t *work_tail; + + pthread_mutex_t work_mutex; + + pthread_cond_t has_work_cond; + pthread_cond_t working_cond; + + pthread_t *threads; + + int thread_cnt; + int work_cnt; + int done_cnt; + + int stop; + +} tpool_t; + + +/** + * Create a work object + */ +static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) { + + if (func == NULL) { + return NULL; + } + + tpool_work_t *work = malloc(sizeof(tpool_work_t)); + work->func = func; + work->arg = arg; + work->next = NULL; + + return work; +} + +/** + * Pop work object from thread pool + */ +static tpool_work_t *tpool_work_get(tpool_t *pool) { + + tpool_work_t *work = pool->work_head; + if (work == NULL) { + return NULL; + } + + if (work->next == NULL) { + pool->work_head = NULL; + pool->work_tail = NULL; + } else { + pool->work_head = work->next; + } + + return work; +} + +/** + * Push work object to thread pool + */ +int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) { + + tpool_work_t *work = tpool_work_create(func, arg); + if (work == NULL) { + return 0; + } + + pthread_mutex_lock(&(pool->work_mutex)); + if (pool->work_head == NULL) { + pool->work_head = work; + pool->work_tail = pool->work_head; + } else { + pool->work_tail->next = work; + pool->work_tail = work; + } + + pool->work_cnt++; + + pthread_cond_broadcast(&(pool->has_work_cond)); + pthread_mutex_unlock(&(pool->work_mutex)); + + return 1; +} + +/** + * Thread worker function + */ +static void *tpool_worker(void *arg) { + tpool_t *pool = arg; + + while (1) { + pthread_mutex_lock(&pool->work_mutex); + if (pool->stop) { + break; + } + + if (pool->work_head == NULL) { + pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex)); + } + + tpool_work_t *work = tpool_work_get(pool); + pthread_mutex_unlock(&(pool->work_mutex)); + + if (work != NULL) { + if (pool->stop) { + break; + } + + work->func(work->arg); + free(work->arg); + free(work); + } + + pthread_mutex_lock(&(pool->work_mutex)); + if (work != NULL) { + pool->done_cnt++; + } + + if (pool->work_head == NULL) { + pthread_cond_signal(&(pool->working_cond)); + } + pthread_mutex_unlock(&(pool->work_mutex)); + } + + pthread_cond_signal(&(pool->working_cond)); + pthread_mutex_unlock(&(pool->work_mutex)); + return NULL; +} + +void tpool_wait(tpool_t *pool) { + pthread_mutex_lock(&(pool->work_mutex)); + while (1) { + if (pool->done_cnt < pool->work_cnt) { + pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); + } else { + usleep(500000); + if (pool->done_cnt == pool->work_cnt) { + pool->stop = 1; + usleep(500000); + break; + } + } + } + pthread_mutex_unlock(&(pool->work_mutex)); +} + +void tpool_destroy(tpool_t *pool) { + if (pool == NULL) { + return; + } + + pthread_mutex_lock(&(pool->work_mutex)); + tpool_work_t *work = pool->work_head; + while (work != NULL) { + tpool_work_t *tmp = work->next; + free(work); + work = tmp; + } + + pthread_cond_broadcast(&(pool->has_work_cond)); + pthread_mutex_unlock(&(pool->work_mutex)); + + for (size_t i = 0; i < pool->thread_cnt; i++) { + pthread_t thread = pool->threads[i]; + if (thread != 0) { + void *_; + pthread_join(thread, &_); + } + } + + pthread_mutex_destroy(&(pool->work_mutex)); + pthread_cond_destroy(&(pool->has_work_cond)); + pthread_cond_destroy(&(pool->working_cond)); + + free(pool->threads); + free(pool); +} + +tpool_t *tpool_create(int thread_cnt) { + + tpool_t *pool = malloc(sizeof(tpool_t)); + pool->thread_cnt = thread_cnt; + pool->work_cnt = 0; + pool->done_cnt = 0; + pool->stop = 0; + pool->threads = calloc(sizeof(pthread_t), thread_cnt); + + pthread_mutex_init(&(pool->work_mutex), NULL); + + pthread_cond_init(&(pool->has_work_cond), NULL); + pthread_cond_init(&(pool->working_cond), NULL); + + pool->work_head = NULL; + pool->work_tail = NULL; + + return pool; +} + +void tpool_start(tpool_t *pool) { + + for (size_t i = 0; i < pool->thread_cnt; i++) { + pthread_create(&pool->threads[i], NULL, tpool_worker, pool); + } +} \ No newline at end of file diff --git a/tpool.h b/tpool.h new file mode 100644 index 0000000..2c0bc4a --- /dev/null +++ b/tpool.h @@ -0,0 +1,19 @@ +#ifndef DEEPEXTRACT_TPOOL_H +#define DEEPEXTRACT_TPOOL_H + +struct tpool; +typedef struct tpool tpool_t; + +typedef void (*thread_func_t)(void *arg); + +tpool_t *tpool_create(int num); + +void tpool_start(tpool_t *pool); + +void tpool_destroy(tpool_t *tm); + +int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg); + +void tpool_wait(tpool_t *tm); + +#endif \ No newline at end of file