mirror of
https://github.com/simon987/libscan.git
synced 2025-04-09 05:46:42 +00:00
support for mobi files simon987/sist2#41
This commit is contained in:
parent
90c4ca3d6e
commit
b7a565a1c4
@ -18,7 +18,7 @@ add_library(
|
||||
libscan/font/font.c libscan/font/font.h
|
||||
|
||||
third-party/utf8.h
|
||||
)
|
||||
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h)
|
||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||
@ -49,7 +49,28 @@ target_compile_options(
|
||||
-g
|
||||
)
|
||||
|
||||
#SET(CMAKE_C_LINK_EXECUTABLE "g++ <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
include(ExternalProject)
|
||||
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||
ExternalProject_Add(
|
||||
libmobi
|
||||
GIT_REPOSITORY https://github.com/bfabiszewski/libmobi.git
|
||||
GIT_TAG "public"
|
||||
|
||||
UPDATE_COMMAND ""
|
||||
PATCH_COMMAND ""
|
||||
TEST_COMMAND ""
|
||||
CONFIGURE_COMMAND ./autogen.sh && ./configure
|
||||
INSTALL_COMMAND ""
|
||||
|
||||
PREFIX "third-party/ext_libmobi"
|
||||
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
|
||||
|
||||
BUILD_COMMAND make -j 3 --silent
|
||||
)
|
||||
|
||||
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
|
||||
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
|
||||
|
||||
string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
||||
string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
||||
@ -69,6 +90,8 @@ target_link_libraries(
|
||||
${HARFBUZZ_LIBRARIES}
|
||||
libmupdf
|
||||
|
||||
${MOBI_LIB_DIR}/libmobi.a
|
||||
|
||||
freetype
|
||||
${HARFBUZZ_LIBRARIES}
|
||||
${JBIG2DEC_LIB}
|
||||
@ -106,6 +129,7 @@ target_include_directories(
|
||||
${JPEG_INCLUDE_DIR}
|
||||
${LIBXML2_INCLUDE_DIR}
|
||||
${FFMPEG_INCLUDE_DIR}
|
||||
${MOBI_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
# Testing
|
||||
|
@ -19,3 +19,9 @@
|
||||
|
||||
#undef ABS
|
||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||
|
||||
#define APPEND_STR_META(doc, keyname, value) \
|
||||
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||
meta_str->key = keyname; \
|
||||
strcpy(meta_str->str_val, value); \
|
||||
APPEND_META(doc, meta_str)
|
||||
|
73
libscan/mobi/scan_mobi.c
Normal file
73
libscan/mobi/scan_mobi.c
Normal file
@ -0,0 +1,73 @@
|
||||
#include "scan_mobi.h"
|
||||
|
||||
#include <mobi.h>
|
||||
#include "stdlib.h"
|
||||
|
||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
|
||||
MOBIData *m = mobi_init();
|
||||
if (m == NULL) {
|
||||
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t buf_len;
|
||||
char* buf = read_all(f, &buf_len);
|
||||
|
||||
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||
if (file == NULL) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed: %d", ferror(file))
|
||||
return;
|
||||
}
|
||||
|
||||
MOBI_RET mobi_ret = mobi_load_file(m, file);
|
||||
fclose(file);
|
||||
if (mobi_ret != MOBI_SUCCESS) {
|
||||
mobi_free(m);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
|
||||
return;
|
||||
}
|
||||
|
||||
char *author = mobi_meta_get_author(m);
|
||||
if (author != NULL) {
|
||||
APPEND_STR_META(doc, MetaAuthor, author)
|
||||
free(author);
|
||||
}
|
||||
char *title = mobi_meta_get_title(m);
|
||||
if (title != NULL) {
|
||||
APPEND_STR_META(doc, MetaTitle, title)
|
||||
free(title);
|
||||
}
|
||||
|
||||
const size_t maxlen = mobi_get_text_maxsize(m);
|
||||
if (maxlen == MOBI_NOTSET) {
|
||||
free(buf);
|
||||
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
|
||||
return;
|
||||
}
|
||||
|
||||
char *content_str = malloc(maxlen + 1);
|
||||
size_t length = maxlen;
|
||||
mobi_ret = mobi_get_rawml(m, content_str, &length);
|
||||
if (mobi_ret != MOBI_SUCCESS) {
|
||||
mobi_free(m);
|
||||
free(content_str);
|
||||
free(buf);
|
||||
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
|
||||
return;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||
text_buffer_append_markup(&tex, content_str);
|
||||
text_buffer_terminate_string(&tex);
|
||||
|
||||
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||
|
||||
free(content_str);
|
||||
free(buf);
|
||||
text_buffer_destroy(&tex);
|
||||
mobi_free(m);
|
||||
}
|
14
libscan/mobi/scan_mobi.h
Normal file
14
libscan/mobi/scan_mobi.h
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef SCAN_SCAN_MOBI_H
|
||||
#define SCAN_SCAN_MOBI_H
|
||||
|
||||
#include "../scan.h"
|
||||
|
||||
typedef struct {
|
||||
long content_size;
|
||||
log_callback_t log;
|
||||
logf_callback_t logf;
|
||||
} scan_mobi_ctx_t;
|
||||
|
||||
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||
|
||||
#endif
|
@ -105,12 +105,6 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define APPEND_STR_META(doc, keyname, value) \
|
||||
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||
meta_str->key = keyname; \
|
||||
strcpy(meta_str->str_val, value); \
|
||||
APPEND_META(doc, meta_str)
|
||||
|
||||
__always_inline
|
||||
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
|
||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||
|
@ -7,12 +7,6 @@
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
// TODO: global init:
|
||||
/*
|
||||
* av_log_set_level(AV_LOG_QUIET);
|
||||
*/
|
||||
|
||||
|
||||
#define META_INT_MASK 0x80
|
||||
#define META_STR_MASK 0x40
|
||||
#define META_LONG_MASK 0x20
|
||||
@ -151,8 +145,8 @@ typedef struct parse_job_t {
|
||||
#include "util.h"
|
||||
|
||||
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...);
|
||||
typedef void (*log_callback_t)(char *filepath, int level, char *str);
|
||||
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
|
||||
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
|
||||
typedef void (*parse_callback_t)(parse_job_t *job);
|
||||
|
||||
#endif
|
||||
|
@ -81,7 +81,7 @@ static dyn_buffer_t dyn_buffer_create() {
|
||||
|
||||
buf.size = INITIAL_BUF_SIZE;
|
||||
buf.cur = 0;
|
||||
buf.buf = (char*)malloc(INITIAL_BUF_SIZE);
|
||||
buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
|
||||
|
||||
return buf;
|
||||
}
|
||||
@ -92,14 +92,14 @@ static void grow_buffer(dyn_buffer_t *buf, size_t size) {
|
||||
buf->size *= 2;
|
||||
} while (buf->cur + size > buf->size);
|
||||
|
||||
buf->buf = (char*)realloc(buf->buf, buf->size);
|
||||
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||
}
|
||||
}
|
||||
|
||||
static void grow_buffer_small(dyn_buffer_t *buf) {
|
||||
if (buf->cur + sizeof(long) > buf->size) {
|
||||
buf->size *= 2;
|
||||
buf->buf = (char*)realloc(buf->buf, buf->size);
|
||||
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -230,7 +230,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
|
||||
|
||||
if (len <= 4) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (((utf8_int32_t)0xffffff80 & str[i]) == 0) {
|
||||
if (((utf8_int32_t) 0xffffff80 & str[i]) == 0) {
|
||||
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
|
||||
}
|
||||
}
|
||||
@ -241,7 +241,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
|
||||
char tmp[16];
|
||||
|
||||
do {
|
||||
ptr = (char*)utf8codepoint(ptr, &c);
|
||||
ptr = (char *) utf8codepoint(ptr, &c);
|
||||
*(int *) tmp = 0x00000000;
|
||||
memcpy(tmp, oldPtr, ptr - oldPtr);
|
||||
oldPtr = ptr;
|
||||
@ -264,8 +264,39 @@ static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
|
||||
return text_buffer_append_string(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
static void* read_all(vfile_t *f, size_t *size) {
|
||||
void* buf = malloc(f->info.st_size);
|
||||
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
|
||||
|
||||
int tag_open = TRUE;
|
||||
const char *ptr = markup;
|
||||
const char *start = markup;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
if (tag_open) {
|
||||
if (*ptr == '>') {
|
||||
tag_open = FALSE;
|
||||
start = ptr + 1;
|
||||
}
|
||||
} else {
|
||||
if (*ptr == '<') {
|
||||
tag_open = TRUE;
|
||||
if (ptr != start) {
|
||||
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||
return TEXT_BUF_FULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ptr += 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *read_all(vfile_t *f, size_t *size) {
|
||||
void *buf = malloc(f->info.st_size);
|
||||
*size = f->read(f, buf, f->info.st_size);
|
||||
|
||||
//TODO: log
|
||||
|
@ -7,6 +7,7 @@ extern "C" {
|
||||
#include "../libscan/ebook/ebook.h"
|
||||
#include "../libscan/media/media.h"
|
||||
#include "../libscan/ooxml/ooxml.h"
|
||||
#include "../libscan/mobi/scan_mobi.h"
|
||||
#include <libavutil/avutil.h>
|
||||
}
|
||||
|
||||
@ -22,6 +23,7 @@ static scan_media_ctx_t media_ctx;
|
||||
|
||||
static scan_ooxml_ctx_t ooxml_500_ctx;
|
||||
|
||||
static scan_mobi_ctx_t mobi_500_ctx;
|
||||
|
||||
|
||||
/* Text */
|
||||
@ -298,6 +300,49 @@ TEST(Ooxml, Xlsx1) {
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
/* Mobi */
|
||||
TEST(Mobi, Mobi1) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/mobi/Norse Mythology - Neil Gaiman.mobi", &f, &doc);
|
||||
|
||||
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||
|
||||
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Gaiman, Neil");
|
||||
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Norse Mythology");
|
||||
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Mobi, Azw) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/mobi/sample.azw", &f, &doc);
|
||||
|
||||
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||
|
||||
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Nietzsche, Friedrich");
|
||||
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "On the Genealogy of Morality (Hackett Classics)");
|
||||
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
TEST(Mobi, Azw3) {
|
||||
vfile_t f;
|
||||
document_t doc;
|
||||
load_doc_file("libscan-test-files/test_files/mobi/sample.azw3", &f, &doc);
|
||||
|
||||
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||
|
||||
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "George Orwell; Amélie Audiberti");
|
||||
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "1984");
|
||||
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||
|
||||
cleanup(&doc, &f);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
arc_recurse_ctx.log = noop_log;
|
||||
arc_recurse_ctx.logf = noop_logf;
|
||||
@ -335,6 +380,10 @@ int main(int argc, char **argv) {
|
||||
ooxml_500_ctx.log = noop_log;
|
||||
ooxml_500_ctx.logf = noop_logf;
|
||||
|
||||
mobi_500_ctx.content_size = 500;
|
||||
mobi_500_ctx.log = noop_log;
|
||||
mobi_500_ctx.logf = noop_logf;
|
||||
|
||||
av_log_set_level(AV_LOG_QUIET);
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
|
@ -9,11 +9,11 @@ void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc);
|
||||
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
|
||||
void cleanup(document_t *doc, vfile_t *f);
|
||||
|
||||
static void noop_logf(char *filepath, int level, char *format, ...) {
|
||||
static void noop_logf(const char *filepath, int level, char *format, ...) {
|
||||
// noop
|
||||
}
|
||||
|
||||
static void noop_log(char *filepath, int level, char *str) {
|
||||
static void noop_log(const char *filepath, int level, char *str) {
|
||||
// noop
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user