Compare commits

..

2 Commits

Author SHA1 Message Date
8212dd4b23 wip 2021-02-27 15:23:56 -05:00
49d4f1ae48 Change encoding for antiword PDF 2021-01-16 12:17:43 -05:00
5 changed files with 32 additions and 10 deletions

View File

@ -47,19 +47,25 @@ int arc_read(struct vfile *f, void *buf, size_t size) {
return read; return read;
} }
int arc_open(vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) { int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse) {
arc_data->f = f; arc_data->f = f;
if (f->is_fs_file) { if (f->is_fs_file) {
*a = archive_read_new(); *a = archive_read_new();
archive_read_support_filter_all(*a); archive_read_support_filter_all(*a);
archive_read_support_format_all(*a); archive_read_support_format_all(*a);
if (ctx->passphrase[0] != 0) {
archive_read_add_passphrase(*a, ctx->passphrase);
}
return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE); return archive_read_open_filename(*a, f->filepath, ARC_BUF_SIZE);
} else if (allow_recurse) { } else if (allow_recurse) {
*a = archive_read_new(); *a = archive_read_new();
archive_read_support_filter_all(*a); archive_read_support_filter_all(*a);
archive_read_support_format_all(*a); archive_read_support_format_all(*a);
if (ctx->passphrase[0] != 0) {
archive_read_add_passphrase(*a, ctx->passphrase);
}
return archive_read_open( return archive_read_open(
*a, arc_data, *a, arc_data,
@ -80,7 +86,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc) {
arc_data_t arc_data; arc_data_t arc_data;
arc_data.f = f; arc_data.f = f;
int ret = arc_open(f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE); int ret = arc_open(ctx, f, &a, &arc_data, ctx->mode == ARC_MODE_RECURSE);
if (ret == ARC_SKIPPED) { if (ret == ARC_SKIPPED) {
return SCAN_OK; return SCAN_OK;
} }

View File

@ -20,6 +20,7 @@ typedef struct {
log_callback_t log; log_callback_t log;
logf_callback_t logf; logf_callback_t logf;
store_callback_t store; store_callback_t store;
char passphrase[1024];
} scan_arc_ctx_t; } scan_arc_ctx_t;
#define ARC_BUF_SIZE 8192 #define ARC_BUF_SIZE 8192
@ -56,7 +57,7 @@ static int vfile_close_callback(struct archive *a, void *user_data) {
return ARCHIVE_OK; return ARCHIVE_OK;
} }
int arc_open(vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse); int arc_open(scan_arc_ctx_t *ctx, vfile_t *f, struct archive **a, arc_data_t *arc_data, int allow_recurse);
int should_parse_filtered_file(const char *filepath, int ext); int should_parse_filtered_file(const char *filepath, int ext);

View File

@ -5,13 +5,14 @@
#include <stdlib.h> #include <stdlib.h>
#include <archive.h> #include <archive.h>
static scan_arc_ctx_t arc_ctx = (scan_arc_ctx_t) {.passphrase = {0,}};
void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) { void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive *a = NULL; struct archive *a = NULL;
struct archive_entry *entry = NULL; struct archive_entry *entry = NULL;
arc_data_t arc_data; arc_data_t arc_data;
int ret = arc_open(f, &a, &arc_data, TRUE); int ret = arc_open(&arc_ctx, f, &a, &arc_data, TRUE);
if (ret != ARCHIVE_OK) { if (ret != ARCHIVE_OK) {
CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a)) CTX_LOG_ERRORF(f->filepath, "(cbr.c) [%d] %s", ret, archive_error_string(a))
archive_read_free(a); archive_read_free(a);

View File

@ -88,7 +88,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void* b
opts->bHideHiddenText = 1; opts->bHideHiddenText = 1;
opts->bRemoveRemovedText = 1; opts->bRemoveRemovedText = 1;
opts->bUseLandscape = 0; opts->bUseLandscape = 0;
opts->eEncoding = encoding_latin_2; opts->eEncoding = encoding_latin_1;
opts->iPageHeight = 842; // A4 opts->iPageHeight = 842; // A4
opts->iPageWidth = 595; opts->iPageWidth = 595;
opts->eImageLevel = level_ps_3; opts->eImageLevel = level_ps_3;

View File

@ -595,6 +595,20 @@ TEST(Arc, Utf8) {
cleanup(&doc, &f); cleanup(&doc, &f);
} }
TEST(Arc, EncryptedZip) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/arc/encrypted.zip", &f, &doc);
size_t size_before = store_size;
parse_archive(&arc_recurse_media_ctx, &f, &doc);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
/* RAW */ /* RAW */
TEST(RAW, Panasonic) { TEST(RAW, Panasonic) {
vfile_t f; vfile_t f;