Add --list-file argument

This commit is contained in:
2021-12-29 18:54:13 -05:00
parent 52466d5d8a
commit 81008d8936
10 changed files with 113 additions and 24 deletions

View File

@@ -15,18 +15,18 @@ static int should_read_part(const char *part) {
}
if ( // Word
STR_STARTS_WITH(part, "word/document.xml")
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|| STR_STARTS_WITH(part, "word/footer")
|| STR_STARTS_WITH(part, "word/header")
STR_STARTS_WITH_CONSTANT(part, "word/document.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/footnotes.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/endnotes.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/footer")
|| STR_STARTS_WITH_CONSTANT(part, "word/header")
// PowerPoint
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
|| STR_STARTS_WITH_CONSTANT(part, "ppt/slides/slide")
|| STR_STARTS_WITH_CONSTANT(part, "ppt/notesSlides/slide")
// Excel
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH(part, "xl/workbook.xml")
|| STR_STARTS_WITH_CONSTANT(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH_CONSTANT(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH_CONSTANT(part, "xl/workbook.xml")
) {
return TRUE;
}

View File

@@ -7,7 +7,7 @@
#include "../third-party/utf8.h/utf8.h"
#include "macros.h"
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
#define STR_STARTS_WITH_CONSTANT(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
#define TEXT_BUF_FULL (-1)
#define INITIAL_BUF_SIZE (1024 * 16)

View File

@@ -227,7 +227,7 @@ TEST(Ebook, Utf8Pdf) {
parse_ebook(&ebook_500_ctx, &f, "application/pdf", &doc);
ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "最後測試 "));
ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "最後測試 "));
cleanup(&doc, &f);
}
@@ -245,7 +245,7 @@ TEST(Ebook, Utf8PdfInvalidChars) {
// It should say "HART is a group of highly qualified ..." but the PDF
// text is been intentionally fucked with by the authors
// We can at least filter out the non-printable/invalid characters like '<27>' etc
ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified "));
ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified "));
cleanup(&doc, &f);
}