1
0
mirror of https://github.com/simon987/libscan.git synced 2025-04-09 05:46:42 +00:00

support for mobi files

This commit is contained in:
simon987 2020-04-09 16:16:01 -04:00
parent 90c4ca3d6e
commit b7a565a1c4
9 changed files with 210 additions and 25 deletions

@ -18,7 +18,7 @@ add_library(
libscan/font/font.c libscan/font/font.h
third-party/utf8.h
)
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h)
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@ -49,7 +49,28 @@ target_compile_options(
-g
)
#SET(CMAKE_C_LINK_EXECUTABLE "g++ <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/bfabiszewski/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND make -j 3 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
@ -69,6 +90,8 @@ target_link_libraries(
${HARFBUZZ_LIBRARIES}
libmupdf
${MOBI_LIB_DIR}/libmobi.a
freetype
${HARFBUZZ_LIBRARIES}
${JBIG2DEC_LIB}
@ -106,6 +129,7 @@ target_include_directories(
${JPEG_INCLUDE_DIR}
${LIBXML2_INCLUDE_DIR}
${FFMPEG_INCLUDE_DIR}
${MOBI_INCLUDE_DIR}
)
# Testing

@ -19,3 +19,9 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define APPEND_STR_META(doc, keyname, value) \
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
meta_str->key = keyname; \
strcpy(meta_str->str_val, value); \
APPEND_META(doc, meta_str)

73
libscan/mobi/scan_mobi.c Normal file

@ -0,0 +1,73 @@
#include "scan_mobi.h"
#include <mobi.h>
#include "stdlib.h"
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
MOBIData *m = mobi_init();
if (m == NULL) {
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
return;
}
size_t buf_len;
char* buf = read_all(f, &buf_len);
FILE *file = fmemopen(buf, buf_len, "rb");
if (file == NULL) {
mobi_free(m);
free(buf);
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed: %d", ferror(file))
return;
}
MOBI_RET mobi_ret = mobi_load_file(m, file);
fclose(file);
if (mobi_ret != MOBI_SUCCESS) {
mobi_free(m);
free(buf);
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
return;
}
char *author = mobi_meta_get_author(m);
if (author != NULL) {
APPEND_STR_META(doc, MetaAuthor, author)
free(author);
}
char *title = mobi_meta_get_title(m);
if (title != NULL) {
APPEND_STR_META(doc, MetaTitle, title)
free(title);
}
const size_t maxlen = mobi_get_text_maxsize(m);
if (maxlen == MOBI_NOTSET) {
free(buf);
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
return;
}
char *content_str = malloc(maxlen + 1);
size_t length = maxlen;
mobi_ret = mobi_get_rawml(m, content_str, &length);
if (mobi_ret != MOBI_SUCCESS) {
mobi_free(m);
free(content_str);
free(buf);
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
return;
}
text_buffer_t tex = text_buffer_create(ctx->content_size);
text_buffer_append_markup(&tex, content_str);
text_buffer_terminate_string(&tex);
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
free(content_str);
free(buf);
text_buffer_destroy(&tex);
mobi_free(m);
}

14
libscan/mobi/scan_mobi.h Normal file

@ -0,0 +1,14 @@
#ifndef SCAN_SCAN_MOBI_H
#define SCAN_SCAN_MOBI_H
#include "../scan.h"
typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
} scan_mobi_ctx_t;
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
#endif

@ -105,12 +105,6 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu
return ret;
}
#define APPEND_STR_META(doc, keyname, value) \
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
meta_str->key = keyname; \
strcpy(meta_str->str_val, value); \
APPEND_META(doc, meta_str)
__always_inline
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,

@ -7,12 +7,6 @@
#include "macros.h"
// TODO: global init:
/*
* av_log_set_level(AV_LOG_QUIET);
*/
#define META_INT_MASK 0x80
#define META_STR_MASK 0x40
#define META_LONG_MASK 0x20
@ -151,8 +145,8 @@ typedef struct parse_job_t {
#include "util.h"
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(char *filepath, int level, char *str);
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
typedef void (*parse_callback_t)(parse_job_t *job);
#endif

@ -81,7 +81,7 @@ static dyn_buffer_t dyn_buffer_create() {
buf.size = INITIAL_BUF_SIZE;
buf.cur = 0;
buf.buf = (char*)malloc(INITIAL_BUF_SIZE);
buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
return buf;
}
@ -92,14 +92,14 @@ static void grow_buffer(dyn_buffer_t *buf, size_t size) {
buf->size *= 2;
} while (buf->cur + size > buf->size);
buf->buf = (char*)realloc(buf->buf, buf->size);
buf->buf = (char *) realloc(buf->buf, buf->size);
}
}
static void grow_buffer_small(dyn_buffer_t *buf) {
if (buf->cur + sizeof(long) > buf->size) {
buf->size *= 2;
buf->buf = (char*)realloc(buf->buf, buf->size);
buf->buf = (char *) realloc(buf->buf, buf->size);
}
}
@ -230,7 +230,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
if (len <= 4) {
for (int i = 0; i < len; i++) {
if (((utf8_int32_t)0xffffff80 & str[i]) == 0) {
if (((utf8_int32_t) 0xffffff80 & str[i]) == 0) {
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
}
}
@ -241,7 +241,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
char tmp[16];
do {
ptr = (char*)utf8codepoint(ptr, &c);
ptr = (char *) utf8codepoint(ptr, &c);
*(int *) tmp = 0x00000000;
memcpy(tmp, oldPtr, ptr - oldPtr);
oldPtr = ptr;
@ -264,8 +264,39 @@ static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
return text_buffer_append_string(buf, str, strlen(str));
}
static void* read_all(vfile_t *f, size_t *size) {
void* buf = malloc(f->info.st_size);
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
int tag_open = TRUE;
const char *ptr = markup;
const char *start = markup;
while (*ptr != '\0') {
if (tag_open) {
if (*ptr == '>') {
tag_open = FALSE;
start = ptr + 1;
}
} else {
if (*ptr == '<') {
tag_open = TRUE;
if (ptr != start) {
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
return TEXT_BUF_FULL;
}
}
}
}
ptr += 1;
}
return 0;
}
static void *read_all(vfile_t *f, size_t *size) {
void *buf = malloc(f->info.st_size);
*size = f->read(f, buf, f->info.st_size);
//TODO: log

@ -7,6 +7,7 @@ extern "C" {
#include "../libscan/ebook/ebook.h"
#include "../libscan/media/media.h"
#include "../libscan/ooxml/ooxml.h"
#include "../libscan/mobi/scan_mobi.h"
#include <libavutil/avutil.h>
}
@ -22,6 +23,7 @@ static scan_media_ctx_t media_ctx;
static scan_ooxml_ctx_t ooxml_500_ctx;
static scan_mobi_ctx_t mobi_500_ctx;
/* Text */
@ -298,6 +300,49 @@ TEST(Ooxml, Xlsx1) {
cleanup(&doc, &f);
}
/* Mobi */
TEST(Mobi, Mobi1) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/mobi/Norse Mythology - Neil Gaiman.mobi", &f, &doc);
parse_mobi(&mobi_500_ctx, &f, &doc);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Gaiman, Neil");
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Norse Mythology");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
cleanup(&doc, &f);
}
TEST(Mobi, Azw) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/mobi/sample.azw", &f, &doc);
parse_mobi(&mobi_500_ctx, &f, &doc);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Nietzsche, Friedrich");
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "On the Genealogy of Morality (Hackett Classics)");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
cleanup(&doc, &f);
}
TEST(Mobi, Azw3) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/mobi/sample.azw3", &f, &doc);
parse_mobi(&mobi_500_ctx, &f, &doc);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "George Orwell; Amélie Audiberti");
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "1984");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
cleanup(&doc, &f);
}
int main(int argc, char **argv) {
arc_recurse_ctx.log = noop_log;
arc_recurse_ctx.logf = noop_logf;
@ -335,6 +380,10 @@ int main(int argc, char **argv) {
ooxml_500_ctx.log = noop_log;
ooxml_500_ctx.logf = noop_logf;
mobi_500_ctx.content_size = 500;
mobi_500_ctx.log = noop_log;
mobi_500_ctx.logf = noop_logf;
av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();

@ -9,11 +9,11 @@ void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc);
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
void cleanup(document_t *doc, vfile_t *f);
static void noop_logf(char *filepath, int level, char *format, ...) {
static void noop_logf(const char *filepath, int level, char *format, ...) {
// noop
}
static void noop_log(char *filepath, int level, char *str) {
static void noop_log(const char *filepath, int level, char *str) {
// noop
}