diff --git a/CMakeLists.txt b/CMakeLists.txt index 3929040..9c53ec4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ project(scan) set(CMAKE_C_STANDARD 11) option(BUILD_TESTS "Build tests" off) +option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0) add_subdirectory(third-party/antiword) add_compile_definitions( @@ -87,25 +88,31 @@ ExternalProject_Add( SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/) SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/) +if (SIST_DEBUG) + SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations") +else() + SET(FFMPEG_DEBUG "") +endif() + ExternalProject_Add( ffmpeg GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git - GIT_TAG "master" + GIT_TAG "n4.3.2" UPDATE_COMMAND "" PATCH_COMMAND "" TEST_COMMAND "" CONFIGURE_COMMAND ./configure --disable-shared --enable-static --disable-ffmpeg --disable-ffplay --disable-ffprobe --disable-doc --disable-manpages --disable-postproc --disable-avfilter --disable-alsa - --disable-lzma --disable-xlib --disable-debug --disable-vdpau --disable-vaapi --disable-sdl2 - --disable-network --extra-cflags=-fPIC + --disable-lzma --disable-xlib --disable-vdpau --disable-vaapi --disable-sdl2 + --disable-network ${FFMPEG_DEBUG} INSTALL_COMMAND "" PREFIX "third-party/ext_ffmpeg" SOURCE_DIR "third-party/ext_ffmpeg/src/ffmpeg" BINARY_DIR "third-party/ext_ffmpeg/src/ffmpeg" - BUILD_COMMAND ${MAKE_EXE} -j 8 --silent + BUILD_COMMAND ${MAKE_EXE} -j33 --silent ) SET(FFMPEG_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_ffmpeg/src/ffmpeg) diff --git a/libscan/ebook/ebook.c b/libscan/ebook/ebook.c index 098ad7a..9019a14 100644 --- a/libscan/ebook/ebook.c +++ b/libscan/ebook/ebook.c @@ -134,7 +134,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d av_image_fill_arrays(scaled_frame->data, scaled_frame->linesize, dst_buf, AV_PIX_FMT_YUV420P, pixmap->w, pixmap->h, 1); const uint8_t *in_data[1] = {pixmap->samples}; - int in_line_size[1] = {pixmap->stride}; + int in_line_size[1] = {(int) pixmap->stride}; sws_scale(sws_ctx, in_data, in_line_size, @@ -223,7 +223,7 @@ static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) { return 0; } -#define IS_VALID_BPP(d) (d==1 || d==2 || d==4 || d==8 || d==16 || d==24 || d==32) +#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32) void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha), @@ -257,7 +257,7 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev), void parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mime_str, document_t *doc, int tn_only) { - fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + fz_context *fzctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT); thread_ctx = *ctx; init_fzctx(fzctx, doc); diff --git a/libscan/font/font.c b/libscan/font/font.c index ab38385..891294e 100644 --- a/libscan/font/font.c +++ b/libscan/font/font.c @@ -144,27 +144,28 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { } size_t buf_len = 0; - void * buf = read_all(f, &buf_len); + void *buf = read_all(f, &buf_len); if (buf == NULL) { CTX_LOG_ERROR(f->filepath, "read_all() failed") return; } FT_Face face; - FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face); + FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, (int) buf_len, 0, &face); if (err != 0) { - CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, FT_Error_String(err)) + CTX_LOG_ERRORF(doc->filepath, "(font.c) FT_New_Memory_Face() returned error code [%d] %s", err, + FT_Error_String(err)) free(buf); return; } - char font_name[1024]; + char font_name[4096]; - if (face->style_name == NULL || *(face->style_name) == '?') { + if (face->style_name == NULL || (strcmp(face->style_name, "?") == 0)) { if (face->family_name == NULL) { strcpy(font_name, "(null)"); } else { - strcpy(font_name, face->family_name); + strncpy(font_name, face->family_name, sizeof(font_name)); } } else { snprintf(font_name, sizeof(font_name), "%s %s", face->family_name, face->style_name); @@ -186,7 +187,8 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { err = FT_Set_Pixel_Sizes(face, 0, pixel); if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, FT_Error_String(err)) + CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Set_Pixel_Sizes() returned error code [%d] %s", err, + FT_Error_String(err)) FT_Done_Face(face); free(buf); return; @@ -207,7 +209,8 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) { c = c >= 'a' && c <= 'z' ? c - 32 : c + 32; err = FT_Load_Char(face, c, FT_LOAD_NO_HINTING | FT_LOAD_RENDER); if (err != 0) { - CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, FT_Error_String(err)) + CTX_LOG_WARNINGF(doc->filepath, "(font.c) FT_Load_Char() returned error code [%d] %s", err, + FT_Error_String(err)) continue; } } diff --git a/libscan/media/media.c b/libscan/media/media.c index d4cd58c..8b120ca 100644 --- a/libscan/media/media.c +++ b/libscan/media/media.c @@ -127,12 +127,15 @@ static void read_subtitles(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, i } avsubtitle_free(&subtitle); } + + av_packet_unref(&packet); } text_buffer_terminate_string(&tex); APPEND_STR_META(doc, MetaContent, tex.dyn_buffer.buf) text_buffer_destroy(&tex); + avcodec_free_context(&decoder); } __always_inline @@ -284,26 +287,34 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f if (strcmp(key, "artist") == 0) { append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist); - } else if (strcmp(tag->key, "ImageDescription") == 0) { + } else if (strcmp(key, "imagedescription") == 0) { APPEND_TAG_META(MetaContent) - } else if (strcmp(tag->key, "Make") == 0) { + } else if (strcmp(key, "make") == 0) { APPEND_TAG_META(MetaExifMake) - } else if (strcmp(tag->key, "Model") == 0) { + } else if (strcmp(key, "model") == 0) { APPEND_TAG_META(MetaExifModel) - } else if (strcmp(tag->key, "Software") == 0) { + } else if (strcmp(key, "software") == 0) { APPEND_TAG_META(MetaExifSoftware) - } else if (strcmp(tag->key, "FNumber") == 0) { + } else if (strcmp(key, "fnumber") == 0) { APPEND_TAG_META(MetaExifFNumber) - } else if (strcmp(tag->key, "FocalLength") == 0) { + } else if (strcmp(key, "focallength") == 0) { APPEND_TAG_META(MetaExifFocalLength) - } else if (strcmp(tag->key, "UserComment") == 0) { + } else if (strcmp(key, "usercomment") == 0) { APPEND_TAG_META(MetaExifUserComment) - } else if (strcmp(tag->key, "ISOSpeedRatings") == 0) { + } else if (strcmp(key, "isospeedratings") == 0) { APPEND_TAG_META(MetaExifIsoSpeedRatings) - } else if (strcmp(tag->key, "ExposureTime") == 0) { + } else if (strcmp(key, "exposuretime") == 0) { APPEND_TAG_META(MetaExifExposureTime) - } else if (strcmp(tag->key, "DateTime") == 0) { + } else if (strcmp(key, "datetime") == 0) { APPEND_TAG_META(MetaExifDateTime) + } else if (strcmp(key, "gpslatitude") == 0) { + APPEND_TAG_META(MetaExifGpsLatitudeDMS) + } else if (strcmp(key, "gpslatituderef") == 0) { + APPEND_TAG_META(MetaExifGpsLatitudeRef) + } else if (strcmp(key, "gpslongitude") == 0) { + APPEND_TAG_META(MetaExifGpsLongitudeDMS) + } else if (strcmp(key, "gpslongituderef") == 0) { + APPEND_TAG_META(MetaExifGpsLongitudeRef) } } } diff --git a/libscan/raw/raw.c b/libscan/raw/raw.c index 3d40c35..895df3a 100644 --- a/libscan/raw/raw.c +++ b/libscan/raw/raw.c @@ -8,7 +8,7 @@ #define MIN_SIZE 32 int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) { - return store_image_thumbnail((scan_media_ctx_t*)ctx, img->data, img->data_size, doc, "x.jpeg"); + return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg"); } int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) { @@ -36,7 +36,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do AVFrame *scaled_frame = av_frame_alloc(); - struct SwsContext *sws_ctx= sws_getContext( + struct SwsContext *sws_ctx = sws_getContext( img->width, img->height, AV_PIX_FMT_RGB24, dstW, dstH, AV_PIX_FMT_YUVJ420P, SIST_SWS_ALGO, 0, 0, 0 @@ -80,6 +80,8 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do return TRUE; } +#define DMS_REF(ref) (((ref) == 'S' || (ref) == 'W') ? -1 : 1) + void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { libraw_data_t *libraw_lib = libraw_init(0); @@ -134,10 +136,23 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) { snprintf(tmp, sizeof(tmp), "%.1f", libraw_lib->other.aperture); APPEND_STR_META(doc, MetaExifFNumber, tmp) - int denominator = (int)roundf(1 / libraw_lib->other.shutter); + int denominator = (int) roundf(1 / libraw_lib->other.shutter); snprintf(tmp, sizeof(tmp), "1/%d", denominator); APPEND_STR_META(doc, MetaExifExposureTime, tmp) + libraw_gps_info_t gps = libraw_lib->other.parsed_gps; + snprintf( + tmp, sizeof(tmp), "%.15f", + (gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref) + ); + APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp) + + snprintf( + tmp, sizeof(tmp), "%.15f", + (gps.latitude[0] + gps.latitude[1] / 60 + gps.latitude[2] / 3600) * DMS_REF(gps.latref) + ); + APPEND_STR_META(doc, MetaExifGpsLatitudeDec, tmp) + APPEND_STR_META(doc, MetaMediaVideoCodec, "raw") if (ctx->tn_size <= 0) { diff --git a/libscan/scan.h b/libscan/scan.h index 6dc69a4..33a8680 100644 --- a/libscan/scan.h +++ b/libscan/scan.h @@ -13,9 +13,9 @@ #define SIST_SWS_ALGO SWS_LANCZOS -#define META_INT_MASK 0x80 -#define META_STR_MASK 0x40 -#define META_LONG_MASK 0x20 +#define META_INT_MASK 0x8000 +#define META_STR_MASK 0x4000 +#define META_LONG_MASK 0x2000 #define UNUSED(x) __attribute__((__unused__)) x @@ -85,6 +85,12 @@ enum metakey { MetaModifiedBy = META_STR(25), MetaThumbnail = META_STR(26), MetaPages = META_INT(27), + MetaExifGpsLongitudeDMS = META_STR(28), + MetaExifGpsLongitudeRef = META_STR(29), + MetaExifGpsLatitudeDMS = META_STR(30), + MetaExifGpsLatitudeRef = META_STR(31), + MetaExifGpsLatitudeDec = META_STR(32), + MetaExifGpsLongitudeDec = META_STR(33), }; typedef struct meta_line { diff --git a/libscan/util.h b/libscan/util.h index e73e1b8..10578a7 100644 --- a/libscan/util.h +++ b/libscan/util.h @@ -133,11 +133,11 @@ static void dyn_buffer_write_int(dyn_buffer_t *buf, int d) { buf->cur += sizeof(int); } -static void dyn_buffer_write_short(dyn_buffer_t *buf, short s) { +static void dyn_buffer_write_short(dyn_buffer_t *buf, uint16_t s) { grow_buffer_small(buf); - *(short *) (buf->buf + buf->cur) = s; - buf->cur += sizeof(short); + *(uint16_t *) (buf->buf + buf->cur) = s; + buf->cur += sizeof(uint16_t); } static void dyn_buffer_write_long(dyn_buffer_t *buf, unsigned long l) { diff --git a/test/main.cpp b/test/main.cpp index 9423c2a..f6dae1e 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -319,6 +319,22 @@ TEST(Comic, ComicCbrFilters) { /* Media (image) */ +TEST(MediaImage, ExifGps1) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/media/exif_GPS.jpg", &f, &doc); + + parse_media(&media_ctx, &f, &doc); + + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLatitudeRef)->str_val, "N"); + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLatitudeDMS)->str_val, "48:1 , 56585399:1000000, 0:1"); + + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLongitudeRef)->str_val, "E"); + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLongitudeDMS)->str_val, "9:1 , 28046900:1000000, 0:1"); + + cleanup(&doc, &f); +} + TEST(MediaImage, Exif1) { vfile_t f; document_t doc; @@ -666,6 +682,23 @@ TEST(RAW, Panasonic) { cleanup(&doc, &f); } +TEST(RAW, ExifGps1) { + vfile_t f; + document_t doc; + load_doc_file("libscan-test-files/test_files/raw/exif_gps.DNG", &f, &doc); + + size_t size_before = store_size; + + parse_raw(&raw_ctx, &f, &doc); + + ASSERT_NE(size_before, store_size); + + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLatitudeDec)->str_val, "48.943088531494141"); + ASSERT_STREQ(get_meta(&doc, MetaExifGpsLongitudeDec)->str_val, "9.467448234558105"); + + cleanup(&doc, &f); +} + TEST(RAW, Nikon) { vfile_t f; document_t doc; diff --git a/third-party/utf8.h b/third-party/utf8.h index ee5a7d4..a67acc7 160000 --- a/third-party/utf8.h +++ b/third-party/utf8.h @@ -1 +1 @@ -Subproject commit ee5a7d4beb7755da13e4d4ec3eccfb65a0530456 +Subproject commit a67acc78fd0fc272ad45362b828efdcb24874e64