mirror of
https://github.com/simon987/libscan.git
synced 2025-04-05 12:23:00 +00:00
65 lines
1.7 KiB
C
65 lines
1.7 KiB
C
#include "text.h"
|
|
|
|
scan_code_t parse_text(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
|
|
|
int to_read = MIN(ctx->content_size, f->info.st_size);
|
|
|
|
if (to_read <= 2) {
|
|
return SCAN_OK;
|
|
}
|
|
|
|
char *buf = malloc(to_read);
|
|
int ret = f->read(f, buf, to_read);
|
|
if (ret < 0) {
|
|
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
|
free(buf);
|
|
return SCAN_ERR_READ;
|
|
}
|
|
|
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
|
|
|
if ((*(int16_t*)buf) == (int16_t)0xFFFE) {
|
|
text_buffer_append_string16_le(&tex, buf + 2, to_read - 2);
|
|
} else if((*(int16_t*)buf) == (int16_t)0xFEFF) {
|
|
text_buffer_append_string16_be(&tex, buf + 2, to_read - 2);
|
|
} else {
|
|
text_buffer_append_string(&tex, buf, to_read);
|
|
}
|
|
text_buffer_terminate_string(&tex);
|
|
|
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
|
|
|
free(buf);
|
|
text_buffer_destroy(&tex);
|
|
|
|
return SCAN_OK;
|
|
}
|
|
|
|
#define MAX_MARKUP_SIZE 1024 * 1024
|
|
|
|
scan_code_t parse_markup(scan_text_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
|
|
|
int to_read = MIN(MAX_MARKUP_SIZE, f->info.st_size);
|
|
|
|
char *buf = malloc(to_read + 1);
|
|
int ret = f->read(f, buf, to_read);
|
|
if (ret < 0) {
|
|
CTX_LOG_ERRORF(doc->filepath, "read() returned error code: [%d]", ret)
|
|
free(buf);
|
|
return SCAN_ERR_READ;
|
|
}
|
|
|
|
*(buf + to_read) = '\0';
|
|
|
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
|
text_buffer_append_markup(&tex, buf);
|
|
text_buffer_terminate_string(&tex);
|
|
|
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf);
|
|
|
|
free(buf);
|
|
text_buffer_destroy(&tex);
|
|
|
|
return SCAN_OK;
|
|
}
|