mirror of
				https://github.com/simon987/sist2.git
				synced 2025-11-04 01:36:51 +00:00 
			
		
		
		
	#8 Skip PDF scan when content-size is 0
This commit is contained in:
		
							parent
							
								
									d089601dc5
								
							
						
					
					
						commit
						4ab2ba1a02
					
				@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					#include <src/ctx.h>
 | 
				
			||||||
#include "pdf.h"
 | 
					#include "pdf.h"
 | 
				
			||||||
#include "src/ctx.h"
 | 
					#include "src/ctx.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -185,79 +186,81 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
 | 
				
			|||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fz_stext_options opts = {0};
 | 
					    if (ScanCtx.content_size > 0) {
 | 
				
			||||||
    text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
 | 
					        fz_stext_options opts = {0};
 | 
				
			||||||
 | 
					        text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (int current_page = 0; current_page < page_count; current_page++) {
 | 
				
			||||||
 | 
					            fz_page *page = NULL;
 | 
				
			||||||
 | 
					            if (current_page == 0) {
 | 
				
			||||||
 | 
					                page = cover;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                fz_var(err);
 | 
				
			||||||
 | 
					                fz_try(ctx)
 | 
				
			||||||
 | 
					                            page = fz_load_page(ctx, fzdoc, current_page);
 | 
				
			||||||
 | 
					                fz_catch(ctx)
 | 
				
			||||||
 | 
					                    err = ctx->error.errcode;
 | 
				
			||||||
 | 
					                if (err != 0) {
 | 
				
			||||||
 | 
					                    text_buffer_destroy(&text_buf);
 | 
				
			||||||
 | 
					                    fz_drop_page(ctx, page);
 | 
				
			||||||
 | 
					                    fz_drop_stream(ctx, stream);
 | 
				
			||||||
 | 
					                    fz_drop_document(ctx, fzdoc);
 | 
				
			||||||
 | 
					                    fz_drop_context(ctx);
 | 
				
			||||||
 | 
					                    return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
 | 
				
			||||||
 | 
					            fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (int current_page = 0; current_page < page_count; current_page++) {
 | 
					 | 
				
			||||||
        fz_page *page = NULL;
 | 
					 | 
				
			||||||
        if (current_page == 0) {
 | 
					 | 
				
			||||||
            page = cover;
 | 
					 | 
				
			||||||
        } else {
 | 
					 | 
				
			||||||
            fz_var(err);
 | 
					            fz_var(err);
 | 
				
			||||||
            fz_try(ctx)
 | 
					            fz_try(ctx)
 | 
				
			||||||
                page = fz_load_page(ctx, fzdoc, current_page);
 | 
					                        fz_run_page(ctx, page, dev, fz_identity, NULL);
 | 
				
			||||||
 | 
					            fz_always(ctx)
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    fz_close_device(ctx, dev);
 | 
				
			||||||
 | 
					                    fz_drop_device(ctx, dev);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
            fz_catch(ctx)
 | 
					            fz_catch(ctx)
 | 
				
			||||||
                err = ctx->error.errcode;
 | 
					                err = ctx->error.errcode;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if (err != 0) {
 | 
					            if (err != 0) {
 | 
				
			||||||
                text_buffer_destroy(&text_buf);
 | 
					                text_buffer_destroy(&text_buf);
 | 
				
			||||||
                fz_drop_page(ctx, page);
 | 
					                fz_drop_page(ctx, page);
 | 
				
			||||||
 | 
					                fz_drop_stext_page(ctx, stext);
 | 
				
			||||||
                fz_drop_stream(ctx, stream);
 | 
					                fz_drop_stream(ctx, stream);
 | 
				
			||||||
                fz_drop_document(ctx, fzdoc);
 | 
					                fz_drop_document(ctx, fzdoc);
 | 
				
			||||||
                fz_drop_context(ctx);
 | 
					                fz_drop_context(ctx);
 | 
				
			||||||
                return;
 | 
					                return;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
 | 
					            fz_stext_block *block = stext->first_block;
 | 
				
			||||||
        fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
 | 
					            while (block != NULL) {
 | 
				
			||||||
 | 
					                int ret = read_stext_block(block, &text_buf);
 | 
				
			||||||
        fz_var(err);
 | 
					                if (ret == TEXT_BUF_FULL) {
 | 
				
			||||||
        fz_try(ctx)
 | 
					                    break;
 | 
				
			||||||
            fz_run_page(ctx, page, dev, fz_identity, NULL);
 | 
					                }
 | 
				
			||||||
        fz_always(ctx)
 | 
					                block = block->next;
 | 
				
			||||||
        {
 | 
					            }
 | 
				
			||||||
            fz_close_device(ctx, dev);
 | 
					 | 
				
			||||||
            fz_drop_device(ctx, dev);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        fz_catch(ctx)
 | 
					 | 
				
			||||||
            err = ctx->error.errcode;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (err != 0) {
 | 
					 | 
				
			||||||
            text_buffer_destroy(&text_buf);
 | 
					 | 
				
			||||||
            fz_drop_page(ctx, page);
 | 
					 | 
				
			||||||
            fz_drop_stext_page(ctx, stext);
 | 
					            fz_drop_stext_page(ctx, stext);
 | 
				
			||||||
            fz_drop_stream(ctx, stream);
 | 
					            fz_drop_page(ctx, page);
 | 
				
			||||||
            fz_drop_document(ctx, fzdoc);
 | 
					 | 
				
			||||||
            fz_drop_context(ctx);
 | 
					 | 
				
			||||||
            return;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fz_stext_block *block = stext->first_block;
 | 
					            if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
 | 
				
			||||||
        while (block != NULL) {
 | 
					 | 
				
			||||||
            int ret = read_stext_block(block, &text_buf);
 | 
					 | 
				
			||||||
            if (ret == TEXT_BUF_FULL) {
 | 
					 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            block = block->next;
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        fz_drop_stext_page(ctx, stext);
 | 
					        text_buffer_terminate_string(&text_buf);
 | 
				
			||||||
        fz_drop_page(ctx, page);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
 | 
					        meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
 | 
				
			||||||
            break;
 | 
					        meta_content->key = MetaContent;
 | 
				
			||||||
        }
 | 
					        memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
 | 
				
			||||||
 | 
					        APPEND_META(doc, meta_content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        text_buffer_destroy(&text_buf);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    text_buffer_terminate_string(&text_buf);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
 | 
					 | 
				
			||||||
    meta_content->key = MetaContent;
 | 
					 | 
				
			||||||
    memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
 | 
					 | 
				
			||||||
    APPEND_META(doc, meta_content)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fz_drop_stream(ctx, stream);
 | 
					    fz_drop_stream(ctx, stream);
 | 
				
			||||||
    fz_drop_document(ctx, fzdoc);
 | 
					    fz_drop_document(ctx, fzdoc);
 | 
				
			||||||
    fz_drop_context(ctx);
 | 
					    fz_drop_context(ctx);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    text_buffer_destroy(&text_buf);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user