mirror of
https://github.com/simon987/libscan.git
synced 2025-04-17 17:26:45 +00:00
support for mobi files simon987/sist2#41
This commit is contained in:
parent
90c4ca3d6e
commit
b7a565a1c4
@ -18,7 +18,7 @@ add_library(
|
|||||||
libscan/font/font.c libscan/font/font.h
|
libscan/font/font.c libscan/font/font.h
|
||||||
|
|
||||||
third-party/utf8.h
|
third-party/utf8.h
|
||||||
)
|
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h)
|
||||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||||
|
|
||||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
|
||||||
@ -49,7 +49,28 @@ target_compile_options(
|
|||||||
-g
|
-g
|
||||||
)
|
)
|
||||||
|
|
||||||
#SET(CMAKE_C_LINK_EXECUTABLE "g++ <FLAGS> <CMAKE_C_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
include(ExternalProject)
|
||||||
|
find_program(MAKE_EXE NAMES gmake nmake make)
|
||||||
|
ExternalProject_Add(
|
||||||
|
libmobi
|
||||||
|
GIT_REPOSITORY https://github.com/bfabiszewski/libmobi.git
|
||||||
|
GIT_TAG "public"
|
||||||
|
|
||||||
|
UPDATE_COMMAND ""
|
||||||
|
PATCH_COMMAND ""
|
||||||
|
TEST_COMMAND ""
|
||||||
|
CONFIGURE_COMMAND ./autogen.sh && ./configure
|
||||||
|
INSTALL_COMMAND ""
|
||||||
|
|
||||||
|
PREFIX "third-party/ext_libmobi"
|
||||||
|
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
|
||||||
|
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
|
||||||
|
|
||||||
|
BUILD_COMMAND make -j 3 --silent
|
||||||
|
)
|
||||||
|
|
||||||
|
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
|
||||||
|
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
|
||||||
|
|
||||||
string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
string(REGEX REPLACE "-lvdpau" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
||||||
string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
string(REGEX REPLACE "-lX11" "" FFMPEG_LIBRARIES "${FFMPEG_LIBRARIES}")
|
||||||
@ -69,6 +90,8 @@ target_link_libraries(
|
|||||||
${HARFBUZZ_LIBRARIES}
|
${HARFBUZZ_LIBRARIES}
|
||||||
libmupdf
|
libmupdf
|
||||||
|
|
||||||
|
${MOBI_LIB_DIR}/libmobi.a
|
||||||
|
|
||||||
freetype
|
freetype
|
||||||
${HARFBUZZ_LIBRARIES}
|
${HARFBUZZ_LIBRARIES}
|
||||||
${JBIG2DEC_LIB}
|
${JBIG2DEC_LIB}
|
||||||
@ -106,6 +129,7 @@ target_include_directories(
|
|||||||
${JPEG_INCLUDE_DIR}
|
${JPEG_INCLUDE_DIR}
|
||||||
${LIBXML2_INCLUDE_DIR}
|
${LIBXML2_INCLUDE_DIR}
|
||||||
${FFMPEG_INCLUDE_DIR}
|
${FFMPEG_INCLUDE_DIR}
|
||||||
|
${MOBI_INCLUDE_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Testing
|
# Testing
|
||||||
|
@ -19,3 +19,9 @@
|
|||||||
|
|
||||||
#undef ABS
|
#undef ABS
|
||||||
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
#define ABS(a) (((a) < 0) ? -(a) : (a))
|
||||||
|
|
||||||
|
#define APPEND_STR_META(doc, keyname, value) \
|
||||||
|
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
||||||
|
meta_str->key = keyname; \
|
||||||
|
strcpy(meta_str->str_val, value); \
|
||||||
|
APPEND_META(doc, meta_str)
|
||||||
|
73
libscan/mobi/scan_mobi.c
Normal file
73
libscan/mobi/scan_mobi.c
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#include "scan_mobi.h"
|
||||||
|
|
||||||
|
#include <mobi.h>
|
||||||
|
#include "stdlib.h"
|
||||||
|
|
||||||
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||||
|
|
||||||
|
MOBIData *m = mobi_init();
|
||||||
|
if (m == NULL) {
|
||||||
|
CTX_LOG_ERROR(f->filepath, "mobi_init() failed")
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t buf_len;
|
||||||
|
char* buf = read_all(f, &buf_len);
|
||||||
|
|
||||||
|
FILE *file = fmemopen(buf, buf_len, "rb");
|
||||||
|
if (file == NULL) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "fmemopen() failed: %d", ferror(file))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MOBI_RET mobi_ret = mobi_load_file(m, file);
|
||||||
|
fclose(file);
|
||||||
|
if (mobi_ret != MOBI_SUCCESS) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "mobi_laod_file() returned error code [%d]", mobi_ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *author = mobi_meta_get_author(m);
|
||||||
|
if (author != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaAuthor, author)
|
||||||
|
free(author);
|
||||||
|
}
|
||||||
|
char *title = mobi_meta_get_title(m);
|
||||||
|
if (title != NULL) {
|
||||||
|
APPEND_STR_META(doc, MetaTitle, title)
|
||||||
|
free(title);
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t maxlen = mobi_get_text_maxsize(m);
|
||||||
|
if (maxlen == MOBI_NOTSET) {
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_DEBUGF("%s", "Invalid text maxsize: %zu", maxlen)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *content_str = malloc(maxlen + 1);
|
||||||
|
size_t length = maxlen;
|
||||||
|
mobi_ret = mobi_get_rawml(m, content_str, &length);
|
||||||
|
if (mobi_ret != MOBI_SUCCESS) {
|
||||||
|
mobi_free(m);
|
||||||
|
free(content_str);
|
||||||
|
free(buf);
|
||||||
|
CTX_LOG_ERRORF(f->filepath, "mobi_get_rawml() returned error code [%d]", mobi_ret)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
text_buffer_t tex = text_buffer_create(ctx->content_size);
|
||||||
|
text_buffer_append_markup(&tex, content_str);
|
||||||
|
text_buffer_terminate_string(&tex);
|
||||||
|
|
||||||
|
APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf)
|
||||||
|
|
||||||
|
free(content_str);
|
||||||
|
free(buf);
|
||||||
|
text_buffer_destroy(&tex);
|
||||||
|
mobi_free(m);
|
||||||
|
}
|
14
libscan/mobi/scan_mobi.h
Normal file
14
libscan/mobi/scan_mobi.h
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#ifndef SCAN_SCAN_MOBI_H
|
||||||
|
#define SCAN_SCAN_MOBI_H
|
||||||
|
|
||||||
|
#include "../scan.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
long content_size;
|
||||||
|
log_callback_t log;
|
||||||
|
logf_callback_t logf;
|
||||||
|
} scan_mobi_ctx_t;
|
||||||
|
|
||||||
|
void parse_mobi(scan_mobi_ctx_t *ctx, vfile_t *f, document_t *doc);
|
||||||
|
|
||||||
|
#endif
|
@ -105,12 +105,6 @@ static int read_part(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *bu
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define APPEND_STR_META(doc, keyname, value) \
|
|
||||||
meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \
|
|
||||||
meta_str->key = keyname; \
|
|
||||||
strcpy(meta_str->str_val, value); \
|
|
||||||
APPEND_META(doc, meta_str)
|
|
||||||
|
|
||||||
__always_inline
|
__always_inline
|
||||||
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
|
static int read_doc_props(scan_ooxml_ctx_t *ctx, struct archive *a, text_buffer_t *buf, document_t *doc) {
|
||||||
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
xmlDoc *xml = xmlReadIO(xml_io_read, xml_io_close, a, "/", NULL,
|
||||||
|
@ -7,12 +7,6 @@
|
|||||||
|
|
||||||
#include "macros.h"
|
#include "macros.h"
|
||||||
|
|
||||||
// TODO: global init:
|
|
||||||
/*
|
|
||||||
* av_log_set_level(AV_LOG_QUIET);
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#define META_INT_MASK 0x80
|
#define META_INT_MASK 0x80
|
||||||
#define META_STR_MASK 0x40
|
#define META_STR_MASK 0x40
|
||||||
#define META_LONG_MASK 0x20
|
#define META_LONG_MASK 0x20
|
||||||
@ -151,8 +145,8 @@ typedef struct parse_job_t {
|
|||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
|
typedef void (*store_callback_t)(char *key, size_t key_len, char *buf, size_t buf_len);
|
||||||
typedef void (*logf_callback_t)(char *filepath, int level, char *format, ...);
|
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
|
||||||
typedef void (*log_callback_t)(char *filepath, int level, char *str);
|
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
|
||||||
typedef void (*parse_callback_t)(parse_job_t *job);
|
typedef void (*parse_callback_t)(parse_job_t *job);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -81,7 +81,7 @@ static dyn_buffer_t dyn_buffer_create() {
|
|||||||
|
|
||||||
buf.size = INITIAL_BUF_SIZE;
|
buf.size = INITIAL_BUF_SIZE;
|
||||||
buf.cur = 0;
|
buf.cur = 0;
|
||||||
buf.buf = (char*)malloc(INITIAL_BUF_SIZE);
|
buf.buf = (char *) malloc(INITIAL_BUF_SIZE);
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
@ -92,14 +92,14 @@ static void grow_buffer(dyn_buffer_t *buf, size_t size) {
|
|||||||
buf->size *= 2;
|
buf->size *= 2;
|
||||||
} while (buf->cur + size > buf->size);
|
} while (buf->cur + size > buf->size);
|
||||||
|
|
||||||
buf->buf = (char*)realloc(buf->buf, buf->size);
|
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void grow_buffer_small(dyn_buffer_t *buf) {
|
static void grow_buffer_small(dyn_buffer_t *buf) {
|
||||||
if (buf->cur + sizeof(long) > buf->size) {
|
if (buf->cur + sizeof(long) > buf->size) {
|
||||||
buf->size *= 2;
|
buf->size *= 2;
|
||||||
buf->buf = (char*)realloc(buf->buf, buf->size);
|
buf->buf = (char *) realloc(buf->buf, buf->size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,7 +230,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
|
|||||||
|
|
||||||
if (len <= 4) {
|
if (len <= 4) {
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
if (((utf8_int32_t)0xffffff80 & str[i]) == 0) {
|
if (((utf8_int32_t) 0xffffff80 & str[i]) == 0) {
|
||||||
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
|
dyn_buffer_write_char(&buf->dyn_buffer, str[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -241,7 +241,7 @@ static int text_buffer_append_string(text_buffer_t *buf, const char *str, size_t
|
|||||||
char tmp[16];
|
char tmp[16];
|
||||||
|
|
||||||
do {
|
do {
|
||||||
ptr = (char*)utf8codepoint(ptr, &c);
|
ptr = (char *) utf8codepoint(ptr, &c);
|
||||||
*(int *) tmp = 0x00000000;
|
*(int *) tmp = 0x00000000;
|
||||||
memcpy(tmp, oldPtr, ptr - oldPtr);
|
memcpy(tmp, oldPtr, ptr - oldPtr);
|
||||||
oldPtr = ptr;
|
oldPtr = ptr;
|
||||||
@ -264,8 +264,39 @@ static int text_buffer_append_string0(text_buffer_t *buf, char *str) {
|
|||||||
return text_buffer_append_string(buf, str, strlen(str));
|
return text_buffer_append_string(buf, str, strlen(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void* read_all(vfile_t *f, size_t *size) {
|
static int text_buffer_append_markup(text_buffer_t *buf, const char *markup) {
|
||||||
void* buf = malloc(f->info.st_size);
|
|
||||||
|
int tag_open = TRUE;
|
||||||
|
const char *ptr = markup;
|
||||||
|
const char *start = markup;
|
||||||
|
|
||||||
|
while (*ptr != '\0') {
|
||||||
|
if (tag_open) {
|
||||||
|
if (*ptr == '>') {
|
||||||
|
tag_open = FALSE;
|
||||||
|
start = ptr + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (*ptr == '<') {
|
||||||
|
tag_open = TRUE;
|
||||||
|
if (ptr != start) {
|
||||||
|
if (text_buffer_append_string(buf, start, (ptr - start)) == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
if (text_buffer_append_char(buf, ' ') == TEXT_BUF_FULL) {
|
||||||
|
return TEXT_BUF_FULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr += 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *read_all(vfile_t *f, size_t *size) {
|
||||||
|
void *buf = malloc(f->info.st_size);
|
||||||
*size = f->read(f, buf, f->info.st_size);
|
*size = f->read(f, buf, f->info.st_size);
|
||||||
|
|
||||||
//TODO: log
|
//TODO: log
|
||||||
|
@ -7,6 +7,7 @@ extern "C" {
|
|||||||
#include "../libscan/ebook/ebook.h"
|
#include "../libscan/ebook/ebook.h"
|
||||||
#include "../libscan/media/media.h"
|
#include "../libscan/media/media.h"
|
||||||
#include "../libscan/ooxml/ooxml.h"
|
#include "../libscan/ooxml/ooxml.h"
|
||||||
|
#include "../libscan/mobi/scan_mobi.h"
|
||||||
#include <libavutil/avutil.h>
|
#include <libavutil/avutil.h>
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,6 +23,7 @@ static scan_media_ctx_t media_ctx;
|
|||||||
|
|
||||||
static scan_ooxml_ctx_t ooxml_500_ctx;
|
static scan_ooxml_ctx_t ooxml_500_ctx;
|
||||||
|
|
||||||
|
static scan_mobi_ctx_t mobi_500_ctx;
|
||||||
|
|
||||||
|
|
||||||
/* Text */
|
/* Text */
|
||||||
@ -298,6 +300,49 @@ TEST(Ooxml, Xlsx1) {
|
|||||||
cleanup(&doc, &f);
|
cleanup(&doc, &f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Mobi */
|
||||||
|
TEST(Mobi, Mobi1) {
|
||||||
|
vfile_t f;
|
||||||
|
document_t doc;
|
||||||
|
load_doc_file("libscan-test-files/test_files/mobi/Norse Mythology - Neil Gaiman.mobi", &f, &doc);
|
||||||
|
|
||||||
|
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||||
|
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Gaiman, Neil");
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "Norse Mythology");
|
||||||
|
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||||
|
|
||||||
|
cleanup(&doc, &f);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Mobi, Azw) {
|
||||||
|
vfile_t f;
|
||||||
|
document_t doc;
|
||||||
|
load_doc_file("libscan-test-files/test_files/mobi/sample.azw", &f, &doc);
|
||||||
|
|
||||||
|
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||||
|
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Nietzsche, Friedrich");
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "On the Genealogy of Morality (Hackett Classics)");
|
||||||
|
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||||
|
|
||||||
|
cleanup(&doc, &f);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Mobi, Azw3) {
|
||||||
|
vfile_t f;
|
||||||
|
document_t doc;
|
||||||
|
load_doc_file("libscan-test-files/test_files/mobi/sample.azw3", &f, &doc);
|
||||||
|
|
||||||
|
parse_mobi(&mobi_500_ctx, &f, &doc);
|
||||||
|
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "George Orwell; Amélie Audiberti");
|
||||||
|
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "1984");
|
||||||
|
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), 500, 1);
|
||||||
|
|
||||||
|
cleanup(&doc, &f);
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
arc_recurse_ctx.log = noop_log;
|
arc_recurse_ctx.log = noop_log;
|
||||||
arc_recurse_ctx.logf = noop_logf;
|
arc_recurse_ctx.logf = noop_logf;
|
||||||
@ -335,6 +380,10 @@ int main(int argc, char **argv) {
|
|||||||
ooxml_500_ctx.log = noop_log;
|
ooxml_500_ctx.log = noop_log;
|
||||||
ooxml_500_ctx.logf = noop_logf;
|
ooxml_500_ctx.logf = noop_logf;
|
||||||
|
|
||||||
|
mobi_500_ctx.content_size = 500;
|
||||||
|
mobi_500_ctx.log = noop_log;
|
||||||
|
mobi_500_ctx.logf = noop_logf;
|
||||||
|
|
||||||
av_log_set_level(AV_LOG_QUIET);
|
av_log_set_level(AV_LOG_QUIET);
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
|
@ -9,11 +9,11 @@ void load_doc_mem(void *mem, size_t mem_len, vfile_t *f, document_t *doc);
|
|||||||
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
|
void load_doc_file(const char *filepath, vfile_t *f, document_t *doc);
|
||||||
void cleanup(document_t *doc, vfile_t *f);
|
void cleanup(document_t *doc, vfile_t *f);
|
||||||
|
|
||||||
static void noop_logf(char *filepath, int level, char *format, ...) {
|
static void noop_logf(const char *filepath, int level, char *format, ...) {
|
||||||
// noop
|
// noop
|
||||||
}
|
}
|
||||||
|
|
||||||
static void noop_log(char *filepath, int level, char *str) {
|
static void noop_log(const char *filepath, int level, char *str) {
|
||||||
// noop
|
// noop
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user