mirror of
				https://github.com/simon987/libscan.git
				synced 2025-11-04 11:06:52 +00:00 
			
		
		
		
	Read embedded thumbnail simon987/sist2#74
This commit is contained in:
		
							parent
							
								
									00d5680217
								
							
						
					
					
						commit
						9a240f039b
					
				@ -53,7 +53,6 @@ find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/5
 | 
			
		||||
target_compile_options(
 | 
			
		||||
        scan
 | 
			
		||||
        PRIVATE
 | 
			
		||||
        -Werror
 | 
			
		||||
        -g
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -145,6 +145,23 @@ static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAX_TN_SIZE 1024 * 1024 * 15
 | 
			
		||||
 | 
			
		||||
void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, struct archive_entry *entry) {
 | 
			
		||||
    size_t entry_size = archive_entry_size(entry);
 | 
			
		||||
 | 
			
		||||
    if (entry_size <= 0 || entry_size > MAX_TN_SIZE) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    char* buf = malloc(entry_size);
 | 
			
		||||
    archive_read_data(a, buf, entry_size);
 | 
			
		||||
 | 
			
		||||
    APPEND_TN_META(doc, 1, 1) // Size unknown
 | 
			
		||||
    ctx->store((char *) doc->uuid, sizeof(doc->uuid), buf, entry_size);
 | 
			
		||||
    free(buf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
 | 
			
		||||
    size_t buf_len;
 | 
			
		||||
@ -180,6 +197,8 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
 | 
			
		||||
                if (read_doc_props(ctx, a, &tex, doc) != 0) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            } else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
 | 
			
		||||
                read_thumbnail(ctx, doc, a, entry);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,7 @@ typedef struct {
 | 
			
		||||
    long content_size;
 | 
			
		||||
    log_callback_t log;
 | 
			
		||||
    logf_callback_t logf;
 | 
			
		||||
    store_callback_t store;
 | 
			
		||||
} scan_ooxml_ctx_t;
 | 
			
		||||
 | 
			
		||||
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);
 | 
			
		||||
 | 
			
		||||
@ -388,6 +388,21 @@ TEST(Ooxml, Docx1) {
 | 
			
		||||
    cleanup(&doc, &f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST(Ooxml, Docx2Thumbnail) {
 | 
			
		||||
    vfile_t f;
 | 
			
		||||
    document_t doc;
 | 
			
		||||
    load_doc_file("libscan-test-files/test_files/ooxml/embed_tn.docx", &f, &doc);
 | 
			
		||||
 | 
			
		||||
    size_t size_before = store_size;
 | 
			
		||||
 | 
			
		||||
    parse_ooxml(&ooxml_500_ctx, &f, &doc);
 | 
			
		||||
 | 
			
		||||
    ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 4);
 | 
			
		||||
    ASSERT_NE(size_before, store_size);
 | 
			
		||||
 | 
			
		||||
    cleanup(&doc, &f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST(Ooxml, Xlsx1) {
 | 
			
		||||
    vfile_t f;
 | 
			
		||||
    document_t doc;
 | 
			
		||||
@ -550,13 +565,13 @@ int main(int argc, char **argv) {
 | 
			
		||||
 | 
			
		||||
    arc_recurse_media_ctx.log = noop_log;
 | 
			
		||||
    arc_recurse_media_ctx.logf = noop_logf;
 | 
			
		||||
    arc_recurse_media_ctx.store = noop_store;
 | 
			
		||||
    arc_recurse_media_ctx.store = counter_store;
 | 
			
		||||
    arc_recurse_media_ctx.mode = ARC_MODE_RECURSE;
 | 
			
		||||
    arc_recurse_media_ctx.parse = _parse_media;
 | 
			
		||||
 | 
			
		||||
    arc_list_ctx.log = noop_log;
 | 
			
		||||
    arc_list_ctx.logf = noop_logf;
 | 
			
		||||
    arc_list_ctx.store = noop_store;
 | 
			
		||||
    arc_list_ctx.store = counter_store;
 | 
			
		||||
    arc_list_ctx.mode = ARC_MODE_LIST;
 | 
			
		||||
 | 
			
		||||
    text_500_ctx.content_size = 500;
 | 
			
		||||
@ -564,7 +579,7 @@ int main(int argc, char **argv) {
 | 
			
		||||
    text_500_ctx.logf = noop_logf;
 | 
			
		||||
 | 
			
		||||
    ebook_ctx.content_size = 999999999999;
 | 
			
		||||
    ebook_ctx.store = noop_store;
 | 
			
		||||
    ebook_ctx.store = counter_store;
 | 
			
		||||
    ebook_ctx.tesseract_lang = "eng";
 | 
			
		||||
    ebook_ctx.tesseract_path = "./tessdata";
 | 
			
		||||
    ebook_ctx.tn_size = 500;
 | 
			
		||||
@ -576,7 +591,7 @@ int main(int argc, char **argv) {
 | 
			
		||||
 | 
			
		||||
    media_ctx.log = noop_log;
 | 
			
		||||
    media_ctx.logf = noop_logf;
 | 
			
		||||
    media_ctx.store = noop_store;
 | 
			
		||||
    media_ctx.store = counter_store;
 | 
			
		||||
    media_ctx.tn_size = 500;
 | 
			
		||||
    media_ctx.tn_qscale = 1.0;
 | 
			
		||||
    media_ctx.max_media_buffer = (long)2000 * 1024 * 1024;
 | 
			
		||||
@ -584,6 +599,7 @@ int main(int argc, char **argv) {
 | 
			
		||||
    ooxml_500_ctx.content_size = 500;
 | 
			
		||||
    ooxml_500_ctx.log = noop_log;
 | 
			
		||||
    ooxml_500_ctx.logf = noop_logf;
 | 
			
		||||
    ooxml_500_ctx.store = counter_store;
 | 
			
		||||
 | 
			
		||||
    mobi_500_ctx.content_size = 500;
 | 
			
		||||
    mobi_500_ctx.log = noop_log;
 | 
			
		||||
@ -591,7 +607,7 @@ int main(int argc, char **argv) {
 | 
			
		||||
 | 
			
		||||
    raw_ctx.log = noop_log;
 | 
			
		||||
    raw_ctx.logf = noop_logf;
 | 
			
		||||
    raw_ctx.store = noop_store;
 | 
			
		||||
    raw_ctx.store = counter_store;
 | 
			
		||||
    raw_ctx.tn_size = 500;
 | 
			
		||||
    raw_ctx.tn_qscale = 5.0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,8 +17,10 @@ static void noop_log(const char *filepath, int level, char *str) {
 | 
			
		||||
    // noop
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void noop_store(char* key, size_t key_len, char *value, size_t value_len) {
 | 
			
		||||
    // noop
 | 
			
		||||
static size_t store_size = 0;
 | 
			
		||||
 | 
			
		||||
static void counter_store(char* key, size_t key_len, char *value, size_t value_len) {
 | 
			
		||||
    store_size += value_len;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
meta_line_t *get_meta(document_t *doc, metakey key);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user