mirror of
https://github.com/simon987/libscan.git
synced 2025-12-15 15:29:03 +00:00
Support for sha1sum
This commit is contained in:
@@ -46,7 +46,7 @@ static scan_wpd_ctx_t wpd_ctx;
|
||||
static scan_json_ctx_t json_ctx;
|
||||
|
||||
|
||||
document_t LastSubDoc;
|
||||
static document_t LastSubDoc;
|
||||
|
||||
void _parse_media(parse_job_t *job) {
|
||||
parse_media(&media_ctx, &job->vfile, &LastSubDoc);
|
||||
@@ -225,6 +225,24 @@ TEST(Ebook, Utf8Pdf) {
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Ebook, Utf8PdfInvalidChars) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/ebook/invalid_chars.pdf", &f, &doc);
|
||||
|
||||
ebook_ctx.tesseract_lang = nullptr;
|
||||
|
||||
parse_ebook(&ebook_ctx, &f, "application/pdf", &doc);
|
||||
|
||||
ebook_ctx.tesseract_lang = "eng";
|
||||
|
||||
// It should say "HART is a group of highly qualified ..." but the PDF
|
||||
// text is been intentionally fucked with by the authors
|
||||
// We can at least filter out the non-printable/invalid characters like '<27>' etc
|
||||
ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified "));
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Ebook, Pdf2) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
@@ -418,6 +436,20 @@ TEST(MediaImage, Mem1) {
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(MediaImage, AsIsFs) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/media/9555.jpg", &f, &doc);
|
||||
|
||||
size_t size_before = store_size;
|
||||
|
||||
parse_media(&media_ctx, &f, &doc);
|
||||
|
||||
ASSERT_EQ(size_before + 14098, store_size);
|
||||
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(MediaImage, Mem2AsIs) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
|
||||
@@ -61,6 +61,8 @@ void load_file(const char *filepath, vfile_t *f) {
|
||||
f->read = fs_read;
|
||||
f->close = fs_close;
|
||||
f->is_fs_file = TRUE;
|
||||
f->calculate_checksum = TRUE;
|
||||
f->has_checksum = FALSE;
|
||||
}
|
||||
|
||||
void load_mem(void *mem, size_t size, vfile_t *f) {
|
||||
|
||||
Reference in New Issue
Block a user