From ea4fb7fa0d71718fdb8d2038fd470046a5f4051c Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 9 Nov 2019 12:00:07 -0500 Subject: [PATCH] Bug fixes --- src/main.c | 2 +- src/parsing/font.c | 3 +++ src/parsing/parse.c | 6 +++--- src/parsing/pdf.c | 4 ++++ src/util.c | 16 +++++++++++++--- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/main.c b/src/main.c index 490ed06..632a3b4 100644 --- a/src/main.c +++ b/src/main.c @@ -10,7 +10,7 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "1.1.1"; +static const char *const Version = "1.1.2"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", diff --git a/src/parsing/font.c b/src/parsing/font.c index 7049831..d2f61dc 100644 --- a/src/parsing/font.c +++ b/src/parsing/font.c @@ -142,6 +142,9 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) { if (library == NULL) { FT_Init_FreeType(&library); } + if (buf == NULL) { + return; + } FT_Face face; FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face); diff --git a/src/parsing/parse.c b/src/parsing/parse.c index 315993a..571cb81 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -16,7 +16,6 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) { if (*fd == -1) { perror("open"); printf("%s\n", job->filepath); - free(job); return NULL; } } @@ -25,6 +24,7 @@ void *read_all(parse_job_t *job, const char *buf, int bytes_read, int *fd) { int ret = read(*fd, full_buf + bytes_read, job->info.st_size - bytes_read); if (ret == -1) { perror("read"); + return NULL; } } @@ -108,7 +108,7 @@ void parse(void *arg) { void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd); parse_pdf(pdf_buf, doc.size, &doc); - if (pdf_buf != buf) { + if (pdf_buf != buf && pdf_buf != NULL) { free(pdf_buf); } @@ -119,7 +119,7 @@ void parse(void *arg) { void *font_buf = read_all(job, (char *) buf, bytes_read, &fd); parse_font(font_buf, doc.size, &doc); - if (font_buf != buf) { + if (font_buf != buf && font_buf != NULL) { free(font_buf); } } diff --git a/src/parsing/pdf.c b/src/parsing/pdf.c index 4d0e2a1..7b3b112 100644 --- a/src/parsing/pdf.c +++ b/src/parsing/pdf.c @@ -114,6 +114,10 @@ int read_stext_block(fz_stext_block *block, text_buffer_t *tex) { void parse_pdf(void *buf, size_t buf_len, document_t *doc) { + if (buf == NULL) { + return; + } + static int mu_is_initialized = 0; if (!mu_is_initialized) { pthread_mutex_init(&ScanCtx.mupdf_mu, NULL); diff --git a/src/util.c b/src/util.c index 0197656..5749309 100644 --- a/src/util.c +++ b/src/util.c @@ -90,7 +90,7 @@ void text_buffer_terminate_string(text_buffer_t *buf) { } __always_inline -int utf8_validchr(const char* s) { +int utf8_validchr(const char *s) { if (0x00 == (0x80 & *s)) { return TRUE; } else if (0xf0 == (0xf8 & *s)) { @@ -130,7 +130,7 @@ int utf8_validchr(const char* s) { if (0 == (0x1e & s[0])) { return FALSE; } - } else { + } else { return FALSE; } @@ -140,12 +140,22 @@ int utf8_validchr(const char* s) { int text_buffer_append_string(text_buffer_t *buf, char *str, size_t len) { utf8_int32_t c; - for (void *v = utf8codepoint(str, &c); c != '\0' && ((char*)v - str + 4) < len; v = utf8codepoint(v, &c)) { + if (str == NULL || len < 1 || + (0xf0 == (0xf8 & str[0]) && len < 4) || + (0xe0 == (0xf0 & str[0]) && len < 3) || + (0xc0 == (0xe0 & str[0]) && len == 1) || + *(str) == 0) { + text_buffer_terminate_string(buf); + return 0; + } + + for (void *v = utf8codepoint(str, &c); c != '\0' && ((char *) v - str + 4) < len; v = utf8codepoint(v, &c)) { if (utf8_validchr(v)) { text_buffer_append_char(buf, c); } } text_buffer_terminate_string(buf); + return 0; } int text_buffer_append_string0(text_buffer_t *buf, char *str) {