refactor index schema, remove sidecar parsing, remove TS

This commit is contained in:
2023-09-05 18:59:18 -04:00
parent b81ccebdb1
commit 8fdb832c85
84 changed files with 1420 additions and 2445 deletions

View File

@@ -191,7 +191,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
sub_job->vfile.logf = ctx->logf;
sub_job->vfile.has_checksum = FALSE;
sub_job->vfile.calculate_checksum = f->calculate_checksum;
strcpy(sub_job->parent, doc->doc_id);
strcpy(sub_job->parent, doc->filepath);
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
struct stat entry_stat = *archive_entry_stat(entry);

View File

@@ -20,7 +20,6 @@ typedef struct {
parse_callback_t parse;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
char passphrase[4096];
} scan_arc_ctx_t;

View File

@@ -54,7 +54,6 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);

View File

@@ -7,7 +7,6 @@
typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;

View File

@@ -160,8 +160,8 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_init_packet(&thumbnail_packet);
avcodec_receive_packet(thumbnail_encoder, &thumbnail_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store(doc->doc_id, 0, (char *) thumbnail_packet.data, thumbnail_packet.size);
doc->thumbnail_count = 1;
APPEND_THUMBNAIL(doc, (char *) thumbnail_packet.data, thumbnail_packet.size);
free(samples);
av_packet_unref(&thumbnail_packet);

View File

@@ -12,7 +12,6 @@ typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
int fast_epub_parse;
int tn_qscale;
} scan_ebook_ctx_t;

View File

@@ -231,8 +231,8 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store(doc->doc_id, 0, bmp_data.buf, bmp_data.cur);
doc->thumbnail_count = 1;
APPEND_THUMBNAIL(doc, bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);

View File

@@ -8,7 +8,6 @@ typedef struct {
int enable_tn;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_font_ctx_t;
void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@@ -8,7 +8,6 @@ typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
unsigned int json_mime;
unsigned int ndjson_mime;
} scan_json_ctx_t;

View File

@@ -37,15 +37,22 @@
meta_long->long_val = value; \
APPEND_META(doc, meta_long);}} while(0)
#define APPEND_THUMBNAIL(doc, data, data_len) do{ \
{meta_line_t *meta_tn = malloc(sizeof(meta_line_t) + (data_len)); \
meta_tn->key = MetaThumbnail; \
meta_tn->size = data_len; \
memcpy(meta_tn->str_val, data, data_len); \
APPEND_META(doc, meta_tn);}} while(0)
#define APPEND_META(doc, meta) do {\
meta->next = NULL;\
if (doc->meta_head == NULL) {\
doc->meta_head = meta;\
doc->meta_tail = doc->meta_head;\
(meta)->next = NULL;\
if ((doc)->meta_head == NULL) {\
(doc)->meta_head = meta;\
(doc)->meta_tail = (doc)->meta_head;\
} else {\
doc->meta_tail->next = meta;\
doc->meta_tail = meta;\
(doc)->meta_tail->next = meta;\
(doc)->meta_tail = meta;\
}}while(0)
#define APPEND_UTF8_META(doc, keyname, str) \

View File

@@ -466,7 +466,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size);
APPEND_THUMBNAIL(doc, frame_and_packet->packet->data, frame_and_packet->packet->size);
} else {
// Encode frame
AVCodecContext *thumbnail_encoder = alloc_webp_encoder(scaled_frame->width, scaled_frame->height,
@@ -477,9 +477,9 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
AVPacket *thumbnail_packet = av_packet_alloc();
avcodec_receive_packet(thumbnail_encoder, thumbnail_packet);
// Save thumbnail
// Save thumbnail_count
if (thumbnail_index == 0) {
ctx->store(doc->doc_id, 0, thumbnail_packet->data, thumbnail_packet->size);
APPEND_THUMBNAIL(doc, thumbnail_packet->data, thumbnail_packet->size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
@@ -487,7 +487,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// I figure out a better fix.
thumbnail_index -= 1;
ctx->store(doc->doc_id, thumbnail_index, thumbnail_packet->data, thumbnail_packet->size);
APPEND_THUMBNAIL(doc, thumbnail_packet->data, thumbnail_packet->size);
return_value = SAVE_THUMBNAIL_OK;
} else {
@@ -584,7 +584,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s
// Limit to ~1 thumbnail_count every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
: 1;
@@ -610,7 +610,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
}
if (number_of_thumbnails_generated > 0) {
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated);
doc->thumbnail_count = number_of_thumbnails_generated;
}
avcodec_free_context(&decoder);
@@ -859,8 +859,8 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
}
if (scaled_frame == STORE_AS_IS) {
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size);
doc->thumbnail_count = 1;
APPEND_THUMBNAIL(doc, frame_and_packet->packet->data, frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_webp_encoder(scaled_frame->width, scaled_frame->height,
@@ -871,9 +871,9 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
AVPacket *jpeg_packet = av_packet_alloc();
avcodec_receive_packet(jpeg_encoder, jpeg_packet);
// Save thumbnail
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store(doc->doc_id, 0, jpeg_packet->data, jpeg_packet->size);
// Save thumbnail_count
doc->thumbnail_count = 1;
APPEND_THUMBNAIL(doc, jpeg_packet->data, jpeg_packet->size);
av_packet_free(&jpeg_packet);
avcodec_free_context(&jpeg_encoder);

View File

@@ -13,7 +13,6 @@
typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
int tn_size;
int tn_qscale;

View File

@@ -31,7 +31,6 @@ int store_cover(scan_mobi_ctx_t *ctx, document_t *doc, MOBIData *m) {
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
store_image_thumbnail(&media_ctx, record->data, record->size, doc, "img.jpg");

View File

@@ -7,7 +7,6 @@ typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
int tn_qscale;
int tn_size;

View File

@@ -7,7 +7,6 @@ typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
unsigned int msdoc_mime;
} scan_msdoc_ctx_t;

View File

@@ -190,8 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
char *buf = malloc(entry_size);
archive_read_data(a, buf, entry_size);
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store(doc->doc_id, 1, buf, entry_size);
doc->thumbnail_count = 1;
free(buf);
}
@@ -238,7 +237,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (read_doc_props(ctx, a, doc) != 0) {
break;
}
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail_count.jpeg") == 0) {
read_thumbnail(ctx, doc, a, entry);
}
}

View File

@@ -9,7 +9,6 @@ typedef struct {
long content_size;
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
} scan_ooxml_ctx_t;
void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@@ -13,7 +13,6 @@ int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t
.read_subtitles = FALSE,
.tn_count = 1,
.max_media_buffer = 0,
.store = ctx->store,
.log = ctx->log,
.logf = ctx->logf,
.tn_size = ctx->tn_size,
@@ -84,8 +83,8 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
av_init_packet(&thumbnail_packet);
avcodec_receive_packet(thumbnail_encoder, &thumbnail_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) thumbnail_packet.data, thumbnail_packet.size);
doc->thumbnail_count = 1;
APPEND_THUMBNAIL(doc, (char *) thumbnail_packet.data, thumbnail_packet.size);
av_packet_unref(&thumbnail_packet);
av_free(*scaled_frame->data);

View File

@@ -6,7 +6,6 @@
typedef struct {
log_callback_t log;
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;

View File

@@ -18,8 +18,6 @@
#define UNUSED(x) __attribute__((__unused__)) x
typedef void (*store_callback_t)(char *key, int num, void *buf, size_t buf_len);
typedef void (*logf_callback_t)(const char *filepath, int level, char *format, ...);
typedef void (*log_callback_t)(const char *filepath, int level, char *str);
@@ -50,8 +48,8 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1)
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1)
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
// 0000000.000000000
#define SIST_SID_LEN 18
#define EBOOK_LOCKS 0
@@ -66,7 +64,6 @@ enum metakey {
MetaGenre,
MetaTitle,
MetaFontName,
MetaParent,
MetaExifMake,
MetaExifDescription,
MetaExifSoftware,
@@ -79,7 +76,6 @@ enum metakey {
MetaExifDateTime,
MetaAuthor,
MetaModifiedBy,
MetaThumbnail,
MetaChecksum,
// Number
@@ -96,11 +92,15 @@ enum metakey {
MetaExifGpsLatitudeRef,
MetaExifGpsLatitudeDec,
MetaExifGpsLongitudeDec,
// other
MetaThumbnail,
};
typedef struct meta_line {
struct meta_line *next;
enum metakey key;
size_t size;
union {
char str_val[0];
unsigned long long_val;
@@ -109,7 +109,6 @@ typedef struct meta_line {
typedef struct document {
char doc_id[SIST_DOC_ID_LEN];
unsigned long size;
unsigned int mime;
int mtime;
@@ -117,7 +116,9 @@ typedef struct document {
int ext;
meta_line_t *meta_head;
meta_line_t *meta_tail;
int thumbnail_count;
char filepath[PATH_MAX * 2 + 1];
char parent[PATH_MAX * 2 + 1];
} document_t;
typedef struct vfile vfile_t;
@@ -166,7 +167,7 @@ typedef struct {
int base;
int ext;
struct vfile vfile;
char parent[SIST_DOC_ID_LEN];
char parent[PATH_MAX * 2 + 1];
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;

View File

@@ -8,7 +8,7 @@
#include "macros.h"
#include <openssl/evp.h>
#define STR_STARTS_WITH_CONSTANT(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
#define STR_STARTS_WITH_CONSTANT(x, y) ((x) != NULL && (y) != NULL && strncmp(y, x, sizeof(y) - 1) == 0)
#define TEXT_BUF_FULL (-1)
#define INITIAL_BUF_SIZE (1024 * 16)

View File

@@ -526,7 +526,7 @@ TEST(MediaVideo, Vid3Mp4) {
ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 825169);
ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10);
//TODO: Check that thumbnail was generated correctly
//TODO: Check that thumbnail_count was generated correctly
cleanup(&doc, &f);
}
@@ -541,7 +541,7 @@ TEST(MediaVideo, Vid3Ogv) {
ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 590261);
ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10);
//TODO: Check that thumbnail was generated correctly
//TODO: Check that thumbnail_count was generated correctly
cleanup(&doc, &f);
}
@@ -556,7 +556,7 @@ TEST(MediaVideo, Vid3Webm) {
ASSERT_EQ(get_meta(&doc, MetaMediaBitrate)->long_val, 343153);
ASSERT_EQ(get_meta(&doc, MetaMediaDuration)->long_val, 10);
//TODO: Check that thumbnail was generated correctly
//TODO: Check that thumbnail_count was generated correctly
cleanup(&doc, &f);
}