mirror of
https://github.com/simon987/libscan.git
synced 2025-04-05 12:23:00 +00:00
Read embedded thumbnail simon987/sist2#74
This commit is contained in:
parent
00d5680217
commit
9a240f039b
@ -53,7 +53,6 @@ find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5
|
||||
target_compile_options(
|
||||
scan
|
||||
PRIVATE
|
||||
-Werror
|
||||
-g
|
||||
)
|
||||
|
||||
|
@ -145,6 +145,23 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_TN_SIZE 1024 * 1024 * 15
|
||||
|
||||
void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) {
|
||||
size_t entry_size = archive_entry_size(entry);
|
||||
|
||||
if (entry_size <= 0 || entry_size > MAX_TN_SIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
char* buf = malloc(entry_size);
|
||||
archive_read_data(a, buf, entry_size);
|
||||
|
||||
APPEND_TN_META(doc, 1, 1) // Size unknown
|
||||
ctx->store((char *) doc->uuid, sizeof(doc->uuid), buf, entry_size);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
size_t buf_len;
|
||||
@ -180,6 +197,8 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
if (read_doc_props(ctx, a, &tex, doc) != 0) {
|
||||
break;
|
||||
}
|
||||
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
|
||||
read_thumbnail(ctx, doc, a, entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ typedef struct {
|
||||
long content_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
store_callback_t store;
|
||||
} scan_ooxml_ctx_t;
|
||||
|
||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
@ -388,6 +388,21 @@ TEST(Ooxml, Docx1) {
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Ooxml, Docx2Thumbnail) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/ooxml/embed_tn.docx", &f, &doc);
|
||||
|
||||
size_t size_before = store_size;
|
||||
|
||||
parse_ooxml(&ooxml_500_ctx, &f, &doc);
|
||||
|
||||
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4);
|
||||
ASSERT_NE(size_before, store_size);
|
||||
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Ooxml, Xlsx1) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
@ -550,13 +565,13 @@ int main(int argc, char **argv) {
|
||||
|
||||
arc_recurse_media_ctx.log = noop_log;
|
||||
arc_recurse_media_ctx.logf = noop_logf;
|
||||
arc_recurse_media_ctx.store = noop_store;
|
||||
arc_recurse_media_ctx.store = counter_store;
|
||||
arc_recurse_media_ctx.mode = ARC_MODE_RECURSE;
|
||||
arc_recurse_media_ctx.parse = _parse_media;
|
||||
|
||||
arc_list_ctx.log = noop_log;
|
||||
arc_list_ctx.logf = noop_logf;
|
||||
arc_list_ctx.store = noop_store;
|
||||
arc_list_ctx.store = counter_store;
|
||||
arc_list_ctx.mode = ARC_MODE_LIST;
|
||||
|
||||
text_500_ctx.content_size = 500;
|
||||
@ -564,7 +579,7 @@ int main(int argc, char **argv) {
|
||||
text_500_ctx.logf = noop_logf;
|
||||
|
||||
ebook_ctx.content_size = 999999999999;
|
||||
ebook_ctx.store = noop_store;
|
||||
ebook_ctx.store = counter_store;
|
||||
ebook_ctx.tesseract_lang = "eng";
|
||||
ebook_ctx.tesseract_path = "./tessdata";
|
||||
ebook_ctx.tn_size = 500;
|
||||
@ -576,7 +591,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
media_ctx.log = noop_log;
|
||||
media_ctx.logf = noop_logf;
|
||||
media_ctx.store = noop_store;
|
||||
media_ctx.store = counter_store;
|
||||
media_ctx.tn_size = 500;
|
||||
media_ctx.tn_qscale = 1.0;
|
||||
media_ctx.max_media_buffer = (long)2000 * 1024 * 1024;
|
||||
@ -584,6 +599,7 @@ int main(int argc, char **argv) {
|
||||
ooxml_500_ctx.content_size = 500;
|
||||
ooxml_500_ctx.log = noop_log;
|
||||
ooxml_500_ctx.logf = noop_logf;
|
||||
ooxml_500_ctx.store = counter_store;
|
||||
|
||||
mobi_500_ctx.content_size = 500;
|
||||
mobi_500_ctx.log = noop_log;
|
||||
@ -591,7 +607,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
raw_ctx.log = noop_log;
|
||||
raw_ctx.logf = noop_logf;
|
||||
raw_ctx.store = noop_store;
|
||||
raw_ctx.store = counter_store;
|
||||
raw_ctx.tn_size = 500;
|
||||
raw_ctx.tn_qscale = 5.0;
|
||||
|
||||
|
@ -17,8 +17,10 @@ static void noop_log(const char *filepath, int level, char *str) {
|
||||
// noop
|
||||
}
|
||||
|
||||
static void noop_store(char* key, size_t key_len, char *value, size_t value_len) {
|
||||
// noop
|
||||
static size_t store_size = 0;
|
||||
|
||||
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
|
||||
store_size += value_len;
|
||||
}
|
||||
|
||||
meta_line_t *get_meta(document_t *doc, metakey key);
|
||||
|
Loading…
x
Reference in New Issue
Block a user