From 1d95be534b75850a22ffd95dabfd3fd2a5ab180f Mon Sep 17 00:00:00 2001 From: simon987 Date: Mon, 20 Sep 2021 20:15:35 -0400 Subject: [PATCH] Handle excludes in archive files --- libscan/arc/arc.c | 14 +++++++++++--- libscan/arc/arc.h | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/libscan/arc/arc.c b/libscan/arc/arc.c index 6830b7e..c87d965 100644 --- a/libscan/arc/arc.c +++ b/libscan/arc/arc.c @@ -142,7 +142,10 @@ int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *ar } } -scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { +static __thread int sub_strings[30]; +#define EXCLUDED(str) (pcre_exec(exclude, exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0) + +scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra) { struct archive *a = NULL; struct archive_entry *entry = NULL; @@ -213,14 +216,19 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) { } sub_job->base = (int) (strrchr(sub_job->filepath, '/') - sub_job->filepath) + 1; + // Handle excludes + if (exclude != NULL && EXCLUDED(sub_job->filepath)) { + CTX_LOG_DEBUGF("arc.c", "Excluded: %s", sub_job->filepath) + continue; + } + char *p = strrchr(sub_job->filepath, '.'); - if (p != NULL) { + if (p != NULL && (p - sub_job->filepath) > strlen(f->filepath)) { sub_job->ext = (int) (p - sub_job->filepath + 1); } else { sub_job->ext = (int) strlen(sub_job->filepath); } - memset(&sub_job->vfile.sha1_ctx, 0, sizeof(sub_job->vfile.sha1_ctx)); SHA1_Init(&sub_job->vfile.sha1_ctx); ctx->parse(sub_job); diff --git a/libscan/arc/arc.h b/libscan/arc/arc.h index b38ce98..ea9d570 100644 --- a/libscan/arc/arc.h +++ b/libscan/arc/arc.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "../scan.h" # define ARC_SKIPPED (-1) @@ -68,7 +69,7 @@ int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *ar int should_parse_filtered_file(const char *filepath, int ext); -scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc); +scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre *exclude, pcre_extra *exclude_extra); int arc_read(struct vfile *f, void *buf, size_t size);