diff --git a/third-party/libscan/libscan/arc/arc.c b/third-party/libscan/libscan/arc/arc.c index 20cb2dc..e547231 100644 --- a/third-party/libscan/libscan/arc/arc.c +++ b/third-party/libscan/libscan/arc/arc.c @@ -7,6 +7,7 @@ #include #include +#define MAX_DECOMPRESSED_SIZE_RATIO 40.0 int should_parse_filtered_file(const char *filepath, int ext) { char tmp[PATH_MAX * 2]; @@ -206,6 +207,13 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { sub_job->vfile.info = *archive_entry_stat(entry); + + double decompressed_size_ratio = (double) sub_job->vfile.info.st_size / (double) f->info.st_size; + if (decompressed_size_ratio > MAX_DECOMPRESSED_SIZE_RATIO) { + CTX_LOG_DEBUGF("arc.c", "Skipped %s, possible zip bomb (decompressed_size_ratio=%f)", sub_job->filepath, decompressed_size_ratio) + continue; + } + if (S_ISREG(sub_job->vfile.info.st_mode)) { const char *utf8_name = archive_entry_pathname_utf8(entry); diff --git a/third-party/libscan/test/main.cpp b/third-party/libscan/test/main.cpp index f9b561e..0693bce 100644 --- a/third-party/libscan/test/main.cpp +++ b/third-party/libscan/test/main.cpp @@ -19,6 +19,7 @@ extern "C" { static scan_arc_ctx_t arc_recurse_media_ctx; static scan_arc_ctx_t arc_list_ctx; static scan_arc_ctx_t arc_recurse_ooxml_ctx; +static scan_arc_ctx_t arc_recurse_noop_ctx; static scan_text_ctx_t text_500_ctx; @@ -58,6 +59,12 @@ void _parse_ooxml(parse_job_t *job) { parse_ooxml(&ooxml_500_ctx, &job->vfile, &LastSubDoc); } +void _parse_noop(parse_job_t *job) { + char buf[1024]; + + while (job->vfile.read(&job->vfile, buf, sizeof(buf)) != 0) {} +} + /* Text */ @@ -752,6 +759,16 @@ TEST(Mobi, Azw3) { } /* Arc */ +TEST(Arc, ZipBomp) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/arc/bomb.zip", &f, &doc); + + parse_archive(&arc_recurse_noop_ctx, &f, &doc, nullptr, nullptr); + + cleanup(&doc, &f); +} + TEST(Arc, Utf8) { vfile_t f; document_t doc; @@ -1096,6 +1113,12 @@ int main(int argc, char **argv) { arc_recurse_ooxml_ctx.mode = ARC_MODE_RECURSE; arc_recurse_ooxml_ctx.parse = _parse_ooxml; + arc_recurse_noop_ctx.log = noop_log; + arc_recurse_noop_ctx.logf = noop_logf; + arc_recurse_noop_ctx.store = counter_store; + arc_recurse_noop_ctx.mode = ARC_MODE_RECURSE; + arc_recurse_noop_ctx.parse = _parse_noop; + arc_list_ctx.log = noop_log; arc_list_ctx.logf = noop_logf; arc_list_ctx.store = counter_store;