mirror of
https://github.com/simon987/libscan.git
synced 2025-04-05 04:22:58 +00:00
245 lines
7.3 KiB
C
245 lines
7.3 KiB
C
#include "arc.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include <openssl/evp.h>
|
|
#include <pcre.h>
|
|
|
|
|
|
int should_parse_filtered_file(const char *filepath, int ext) {
|
|
char tmp[PATH_MAX * 2];
|
|
|
|
if (ext == 0) {
|
|
return FALSE;
|
|
}
|
|
|
|
if (strncmp(filepath + ext, "tgz", 3) == 0) {
|
|
return TRUE;
|
|
}
|
|
|
|
memcpy(tmp, filepath, ext - 1);
|
|
*(tmp + ext - 1) = '\0';
|
|
|
|
char *idx = strrchr(tmp, '.');
|
|
|
|
if (idx == NULL) {
|
|
return FALSE;
|
|
}
|
|
|
|
if (strcmp(idx, ".tar") == 0) {
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
void arc_close(struct vfile *f) {
|
|
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
|
|
|
|
if (f->rewind_buffer != NULL) {
|
|
free(f->rewind_buffer);
|
|
f->rewind_buffer = NULL;
|
|
f->rewind_buffer_size = 0;
|
|
f->rewind_buffer_cursor = 0;
|
|
}
|
|
}
|
|
|
|
|
|
int arc_read(struct vfile *f, void *buf, size_t size) {
|
|
|
|
int bytes_copied = 0;
|
|
|
|
if (f->rewind_buffer_size != 0) {
|
|
if (size > f->rewind_buffer_size) {
|
|
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, f->rewind_buffer_size);
|
|
|
|
bytes_copied = f->rewind_buffer_size;
|
|
size -= f->rewind_buffer_size;
|
|
buf += f->rewind_buffer_size;
|
|
f->rewind_buffer_size = 0;
|
|
} else {
|
|
memcpy(buf, f->rewind_buffer + f->rewind_buffer_cursor, size);
|
|
f->rewind_buffer_size -= (int) size;
|
|
f->rewind_buffer_cursor += (int) size;
|
|
|
|
return (int) size;
|
|
}
|
|
}
|
|
|
|
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
|
|
|
if (bytes_read != 0 && bytes_read <= size && f->calculate_checksum) {
|
|
f->has_checksum = TRUE;
|
|
|
|
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, bytes_read);
|
|
}
|
|
|
|
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
|
const char *error_str = archive_error_string(f->arc);
|
|
if (error_str != NULL) {
|
|
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
return (int) bytes_read + bytes_copied;
|
|
}
|
|
|
|
int arc_read_rewindable(struct vfile *f, void *buf, size_t size) {
|
|
|
|
if (f->rewind_buffer != NULL) {
|
|
fprintf(stderr, "Allocated rewind buffer more than once for %s", f->filepath);
|
|
exit(-1);
|
|
}
|
|
|
|
size_t bytes_read = archive_read_data(f->arc, buf, size);
|
|
|
|
if (bytes_read != size && archive_errno(f->arc) != 0) {
|
|
const char *error_str = archive_error_string(f->arc);
|
|
if (error_str != NULL) {
|
|
f->logf(f->filepath, LEVEL_ERROR, "Error reading archive file: %s", error_str);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
f->rewind_buffer = malloc(size);
|
|
f->rewind_buffer_size = (int) size;
|
|
f->rewind_buffer_cursor = 0;
|
|
memcpy(f->rewind_buffer, buf, size);
|
|
|
|
return (int) bytes_read;
|
|
}
|
|
|
|
int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) {
|
|
arc_data->f = f;
|
|
|
|
if (f->is_fs_file) {
|
|
*a = archive_read_new();
|
|
archive_read_support_filter_all(*a);
|
|
archive_read_support_format_all(*a);
|
|
if (ctx->passphrase[0] != 0) {
|
|
archive_read_add_passphrase(*a, ctx->passphrase);
|
|
}
|
|
|
|
return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE);
|
|
} else if (allow_recurse) {
|
|
*a = archive_read_new();
|
|
archive_read_support_filter_all(*a);
|
|
archive_read_support_format_all(*a);
|
|
if (ctx->passphrase[0] != 0) {
|
|
archive_read_add_passphrase(*a, ctx->passphrase);
|
|
}
|
|
|
|
return archive_read_open(
|
|
*a, arc_data,
|
|
vfile_open_callback,
|
|
vfile_read_callback,
|
|
vfile_close_callback
|
|
);
|
|
} else {
|
|
return ARC_SKIPPED;
|
|
}
|
|
}
|
|
|
|
static __thread int sub_strings[30];
|
|
#define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
|
|
|
scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) {
|
|
|
|
struct archive *a = NULL;
|
|
struct archive_entry *entry = NULL;
|
|
|
|
arc_data_t arc_data;
|
|
arc_data.f = f;
|
|
|
|
int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE);
|
|
if (ret == ARC_SKIPPED) {
|
|
return SCAN_OK;
|
|
}
|
|
|
|
if (ret != ARCHIVE_OK) {
|
|
CTX_LOG_ERRORF(f->filepath, "(arc.c) [%d] %s", ret, archive_error_string(a))
|
|
archive_read_free(a);
|
|
return SCAN_ERR_READ;
|
|
}
|
|
|
|
if (ctx->mode == ARC_MODE_LIST) {
|
|
dyn_buffer_t buf = dyn_buffer_create();
|
|
|
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
|
if (S_ISREG(archive_entry_stat(entry)->st_mode)) {
|
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
|
const char *file_path = utf8_name == NULL ? archive_entry_pathname(entry) : utf8_name;
|
|
|
|
dyn_buffer_append_string(&buf, file_path);
|
|
dyn_buffer_write_char(&buf, ' ');
|
|
}
|
|
}
|
|
dyn_buffer_write_char(&buf, '\0');
|
|
|
|
meta_line_t *meta_list = malloc(sizeof(meta_line_t) + buf.cur);
|
|
meta_list->key = MetaContent;
|
|
strcpy(meta_list->str_val, buf.buf);
|
|
APPEND_META(doc, meta_list)
|
|
dyn_buffer_destroy(&buf);
|
|
|
|
} else {
|
|
|
|
parse_job_t *sub_job = malloc(sizeof(parse_job_t) + PATH_MAX * 2);
|
|
|
|
sub_job->vfile.close = arc_close;
|
|
sub_job->vfile.read = arc_read;
|
|
sub_job->vfile.read_rewindable = arc_read_rewindable;
|
|
sub_job->vfile.reset = NULL;
|
|
sub_job->vfile.arc = a;
|
|
sub_job->vfile.filepath = sub_job->filepath;
|
|
sub_job->vfile.is_fs_file = FALSE;
|
|
sub_job->vfile.rewind_buffer_size = 0;
|
|
sub_job->vfile.rewind_buffer = NULL;
|
|
sub_job->vfile.log = ctx->log;
|
|
sub_job->vfile.logf = ctx->logf;
|
|
sub_job->vfile.has_checksum = FALSE;
|
|
sub_job->vfile.calculate_checksum = f->calculate_checksum;
|
|
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
|
|
|
|
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
|
sub_job->vfile.info = *archive_entry_stat(entry);
|
|
if (S_ISREG(sub_job->vfile.info.st_mode)) {
|
|
|
|
const char *utf8_name = archive_entry_pathname_utf8(entry);
|
|
|
|
if (utf8_name == NULL) {
|
|
sprintf(sub_job->filepath, "%s#/%s", f->filepath, archive_entry_pathname(entry));
|
|
} else {
|
|
sprintf(sub_job->filepath, "%s#/%s", f->filepath, utf8_name);
|
|
}
|
|
sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1;
|
|
|
|
// Handle excludes
|
|
if (exclude != NULL && EXCLUDED(sub_job->filepath)) {
|
|
CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath)
|
|
continue;
|
|
}
|
|
|
|
char *p = strrchr(sub_job->filepath, '.');
|
|
if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) {
|
|
sub_job->ext = (int) (p - sub_job->filepath + 1);
|
|
} else {
|
|
sub_job->ext = (int) strlen(sub_job->filepath);
|
|
}
|
|
|
|
SHA1_Init(&sub_job->vfile.sha1_ctx);
|
|
|
|
ctx->parse(sub_job);
|
|
}
|
|
}
|
|
|
|
free(sub_job);
|
|
}
|
|
|
|
archive_read_free(a);
|
|
return SCAN_OK;
|
|
}
|