This commit is contained in:
simon 2020-01-11 15:04:25 -05:00
parent 1cb8d6525b
commit 3990b9207d
8 changed files with 659 additions and 8 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "thirdparty/argparse"]
path = thirdparty/argparse
url = https://github.com/cofyc/argparse

View File

@ -2,11 +2,16 @@ cmake_minimum_required(VERSION 3.7)
project(deepextract C)
set(CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD 11)
option(STATIC_BUILD "Static build" off)
add_executable(
deepextract main.c
deepextract
main.c
thirdparty/argparse/argparse.c
thirdparty/argparse/argparse.h
extract.c extract.h
tpool.c tpool.h
)
if (STATIC_BUILD)
@ -15,10 +20,28 @@ if (STATIC_BUILD)
-static
archive
acl
pthread
lzma
lz4
zstd
bz2
z
crypto
)
else ()
target_link_libraries(
deepextract
archive
pthread
)
endif ()
target_compile_options(
deepextract
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
)

318
extract.c Normal file
View File

@ -0,0 +1,318 @@
#include "extract.h"
#include <string.h>
#include <ftw.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <archive.h>
#include <archive_entry.h>
#include <errno.h>
#define ARC_BUF_SIZE 8192
tpool_t *pool;
int Verbose;
int RootLen;
int Flatten;
char *DstPath;
typedef struct vfile vfile_t;
const char *archive_extensions[] = {
".iso",
".zip",
".rar",
".ar",
".arc",
".warc",
".7z",
".tgz",
".tar.gz",
".tar.zstd",
".tar.xz",
".tar.bz",
".tar.bz2",
".tar.lz4",
".tar.lzma",
".docx",
".pptx",
".xlsx",
".epub",
".cbz",
".jar",
".deb",
".rpm",
".xpi",
};
typedef int (*read_func_t)(struct vfile *, void *buf, size_t size);
typedef void (*close_func_t)(struct vfile *);
typedef struct vfile {
union {
int fd;
struct archive *arc;
};
int is_fs_file;
char *filepath;
struct stat info;
read_func_t read;
close_func_t close;
} vfile_t;
typedef struct {
struct vfile vfile;
int base;
char filepath[1];
} job_t;
typedef struct arc_data {
vfile_t *f;
char buf[ARC_BUF_SIZE];
} arc_data_f;
int ends_with(const char *str, const char *suffix) {
size_t str_len = strlen(str);
size_t suffix_len = strlen(suffix);
return (str_len >= suffix_len) && (memcmp(str + str_len - suffix_len, suffix, sizeof(char) * suffix_len) == 0);
}
int is_archive(char *filepath, int base) {
for (int i = 0; i < (sizeof(archive_extensions) / sizeof(archive_extensions[0])); i++) {
if (ends_with(filepath + base, archive_extensions[i])) {
return TRUE;
}
}
return FALSE;
}
int vfile_open_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->is_fs_file && data->f->fd == -1) {
data->f->fd = open(data->f->filepath, O_RDONLY);
}
return ARCHIVE_OK;
}
#define IS_EEXIST_ERR(ret) (ret < 0 && errno == EEXIST)
void copy_or_link(vfile_t *src, const char *dst) {
char new_name[8192];
if (src->is_fs_file) {
int ret = link(src->filepath, dst);
if (IS_EEXIST_ERR(ret)) {
int i = 1;
do {
sprintf(new_name, "%s.%d", dst, i);
ret = link(src->filepath, new_name);
i++;
} while (IS_EEXIST_ERR(ret));
}
} else {
int ret = open(dst, O_WRONLY | O_CREAT | O_EXCL, src->info.st_mode);
if (IS_EEXIST_ERR(ret)) {
int i = 1;
do {
sprintf(new_name, "%s.%d", dst, i);
ret = open(new_name, O_WRONLY | O_CREAT | O_EXCL, src->info.st_mode);
i++;
} while (IS_EEXIST_ERR(ret));
}
if (ret > 0) {
int fd = ret;
char buf[ARC_BUF_SIZE];
while (1) {
ret = src->read(src, buf, ARC_BUF_SIZE);
write(fd, buf, ARC_BUF_SIZE);
if (ret != ARC_BUF_SIZE) {
break;
}
}
close(fd);
}
}
}
long vfile_read_callback(struct archive *a, void *user_data, const void **buf) {
arc_data_f *data = user_data;
*buf = data->buf;
return data->f->read(data->f, data->buf, ARC_BUF_SIZE);
}
int vfile_close_callback(struct archive *a, void *user_data) {
arc_data_f *data = user_data;
if (data->f->close != NULL) {
data->f->close(data->f);
}
return ARCHIVE_OK;
}
int arc_read(struct vfile *f, void *buf, size_t size) {
return archive_read_data(f->arc, buf, size);
}
void handle_file(void *arg) {
job_t *job = arg;
if (is_archive(job->filepath, job->base)) {
struct archive *a;
struct archive_entry *entry;
arc_data_f data;
data.f = &job->vfile;
int ret = 0;
if (data.f->is_fs_file) {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open_filename(a, job->filepath, ARC_BUF_SIZE);
} else {
a = archive_read_new();
archive_read_support_filter_all(a);
archive_read_support_format_all(a);
ret = archive_read_open(
a, &data,
vfile_open_callback,
vfile_read_callback,
vfile_close_callback
);
}
if (ret != ARCHIVE_OK) {
fprintf(stderr, "(arc.c) %s [%d] %s", job->filepath, ret, archive_error_string(a));
archive_read_free(a);
return;
}
job_t *sub_job = malloc(sizeof(job_t) + 8192);
sub_job->vfile.close = NULL;
sub_job->vfile.read = arc_read;
sub_job->vfile.arc = a;
sub_job->vfile.filepath = sub_job->filepath;
sub_job->vfile.is_fs_file = FALSE;
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
if (S_ISREG(archive_entry_filetype(entry))) {
sprintf(sub_job->filepath, "%s#/%s", job->vfile.filepath, archive_entry_pathname(entry));
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
sub_job->vfile.info = *archive_entry_stat(entry);
handle_file(sub_job);
}
}
free(sub_job);
} else {
char *relpath = job->filepath + RootLen;
char dstpath[8192];
strcpy(dstpath, DstPath);
if (Flatten) {
strcat(dstpath, job->filepath + job->base);
} else {
strcat(dstpath, relpath);
}
if (Verbose) {
printf("%s -> %s\n", relpath, dstpath);
}
copy_or_link(&job->vfile, dstpath);
}
}
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
return -1;
}
}
return read(f->fd, buf, size);
}
#define CLOSE_FILE(f) if (f.close != NULL) {f.close(&f);};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
close(f->fd);
}
}
job_t *create_fs_job(const char *filepath, int base, struct stat info) {
int len = (int) strlen(filepath);
job_t *job = malloc(sizeof(job_t) + len);
strcpy(job->filepath, filepath);
job->base = base;
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
job->vfile.close = fs_close;
job->vfile.close = fs_close;
job->vfile.info = info;
job->vfile.is_fs_file = 1;
job->vfile.fd = -1;
return job;
}
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
job_t *job = create_fs_job(filepath, ftw->base, *info);
tpool_add_work(pool, handle_file, job);
}
return 0;
}
int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, 15, FTW_PHYS);
}
int extract(args_t *args) {
Verbose = args->verbose;
DstPath = "/home/drone/Documents/test/";
RootLen = 22;
Flatten = 1;
pool = tpool_create(args->threads);
tpool_start(pool);
walk_directory_tree("/home/drone/Downloads/");
tpool_wait(pool);
tpool_destroy(pool);
return 0;
}

19
extract.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef DEEPEXTRACT_EXTRACT_H
#define DEEPEXTRACT_EXTRACT_H
#include "tpool.h"
#define TRUE 1
#define FALSE 0
#define _XOPEN_SOURCE 500
typedef struct args {
int version;
int verbose;
int dry_run;
int threads;
} args_t;
int extract(args_t *args);
#endif

63
main.c
View File

@ -1,9 +1,60 @@
#include <stdio.h>
#include <archive.h>
#include "thirdparty/argparse/argparse.h"
#include "extract.h"
int main() {
struct archive *a = archive_read_new();
archive_read_disk_open(a, "");
printf("Hello, World!\n");
return 0;
#define DESCRIPTION ""
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.0";
static const char *const usage[] = {
"deepextract [OPTION]... SOURCE DESTINATION",
NULL,
};
int validate_args(args_t *args) {
if (args->dry_run != 0) {
args->verbose = TRUE;
}
if (args->threads <= 0) {
fprintf(stderr, "Invalid thread count");
return FALSE;
}
return TRUE;
}
int main(int argc, const char **argv) {
args_t args = {0, 0, 0, 1};
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &args.version, "Show version and exit"),
OPT_BOOLEAN(0, "verbose", &args.verbose, "Turn on logging"),
OPT_BOOLEAN(0, "dry-run", &args.dry_run, "Don't modify filesystem (implies --verbose)"),
OPT_GROUP("Options"),
OPT_INTEGER('t', "threads", &args.threads, "Thread count"),
OPT_END(),
};
struct argparse argparse;
argparse_init(&argparse, options, usage, 0);
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv);
if (args.version) {
printf(Version);
return 0;
}
if (!validate_args(&args)) {
return -1;
}
return extract(&args);
}

1
thirdparty/argparse vendored Submodule

@ -0,0 +1 @@
Subproject commit fafc503d23d077bda40c29e8a20ea74707452721

217
tpool.c Normal file
View File

@ -0,0 +1,217 @@
#include "tpool.h"
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
typedef void (*thread_func_t)(void *arg);
typedef struct tpool_work {
void *arg;
thread_func_t func;
struct tpool_work *next;
} tpool_work_t;
typedef struct tpool {
tpool_work_t *work_head;
tpool_work_t *work_tail;
pthread_mutex_t work_mutex;
pthread_cond_t has_work_cond;
pthread_cond_t working_cond;
pthread_t *threads;
int thread_cnt;
int work_cnt;
int done_cnt;
int stop;
} tpool_t;
/**
* Create a work object
*/
static tpool_work_t *tpool_work_create(thread_func_t func, void *arg) {
if (func == NULL) {
return NULL;
}
tpool_work_t *work = malloc(sizeof(tpool_work_t));
work->func = func;
work->arg = arg;
work->next = NULL;
return work;
}
/**
* Pop work object from thread pool
*/
static tpool_work_t *tpool_work_get(tpool_t *pool) {
tpool_work_t *work = pool->work_head;
if (work == NULL) {
return NULL;
}
if (work->next == NULL) {
pool->work_head = NULL;
pool->work_tail = NULL;
} else {
pool->work_head = work->next;
}
return work;
}
/**
* Push work object to thread pool
*/
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
tpool_work_t *work = tpool_work_create(func, arg);
if (work == NULL) {
return 0;
}
pthread_mutex_lock(&(pool->work_mutex));
if (pool->work_head == NULL) {
pool->work_head = work;
pool->work_tail = pool->work_head;
} else {
pool->work_tail->next = work;
pool->work_tail = work;
}
pool->work_cnt++;
pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex));
return 1;
}
/**
* Thread worker function
*/
static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
while (1) {
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;
}
if (pool->work_head == NULL) {
pthread_cond_wait(&(pool->has_work_cond), &(pool->work_mutex));
}
tpool_work_t *work = tpool_work_get(pool);
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg);
free(work->arg);
free(work);
}
pthread_mutex_lock(&(pool->work_mutex));
if (work != NULL) {
pool->done_cnt++;
}
if (pool->work_head == NULL) {
pthread_cond_signal(&(pool->working_cond));
}
pthread_mutex_unlock(&(pool->work_mutex));
}
pthread_cond_signal(&(pool->working_cond));
pthread_mutex_unlock(&(pool->work_mutex));
return NULL;
}
void tpool_wait(tpool_t *pool) {
pthread_mutex_lock(&(pool->work_mutex));
while (1) {
if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else {
usleep(500000);
if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(500000);
break;
}
}
}
pthread_mutex_unlock(&(pool->work_mutex));
}
void tpool_destroy(tpool_t *pool) {
if (pool == NULL) {
return;
}
pthread_mutex_lock(&(pool->work_mutex));
tpool_work_t *work = pool->work_head;
while (work != NULL) {
tpool_work_t *tmp = work->next;
free(work);
work = tmp;
}
pthread_cond_broadcast(&(pool->has_work_cond));
pthread_mutex_unlock(&(pool->work_mutex));
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i];
if (thread != 0) {
void *_;
pthread_join(thread, &_);
}
}
pthread_mutex_destroy(&(pool->work_mutex));
pthread_cond_destroy(&(pool->has_work_cond));
pthread_cond_destroy(&(pool->working_cond));
free(pool->threads);
free(pool);
}
tpool_t *tpool_create(int thread_cnt) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->stop = 0;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pthread_mutex_init(&(pool->work_mutex), NULL);
pthread_cond_init(&(pool->has_work_cond), NULL);
pthread_cond_init(&(pool->working_cond), NULL);
pool->work_head = NULL;
pool->work_tail = NULL;
return pool;
}
void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
}
}

19
tpool.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef DEEPEXTRACT_TPOOL_H
#define DEEPEXTRACT_TPOOL_H
struct tpool;
typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(int num);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *tm);
int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg);
void tpool_wait(tpool_t *tm);
#endif