mirror of
https://github.com/simon987/sist2.git
synced 2025-12-10 22:18:54 +00:00
Rework document IDs
This commit is contained in:
52
third-party/libscan/CMakeLists.txt
vendored
52
third-party/libscan/CMakeLists.txt
vendored
@@ -6,26 +6,6 @@ set(CMAKE_C_STANDARD 11)
|
||||
option(BUILD_TESTS "Build tests" on)
|
||||
|
||||
add_subdirectory(third-party/antiword)
|
||||
if (SIST_DEBUG)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
DEBUG
|
||||
)
|
||||
target_compile_options(
|
||||
antiword
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
)
|
||||
else()
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
scan
|
||||
@@ -48,6 +28,38 @@ add_library(
|
||||
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
|
||||
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
if (SIST_DEBUG)
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
DEBUG
|
||||
)
|
||||
target_compile_options(
|
||||
antiword
|
||||
PRIVATE
|
||||
-g
|
||||
-fstack-protector
|
||||
-fno-omit-frame-pointer
|
||||
-fsanitize=address
|
||||
-fno-inline
|
||||
)
|
||||
else()
|
||||
add_compile_definitions(
|
||||
antiword
|
||||
NDEBUG
|
||||
)
|
||||
|
||||
target_compile_options(
|
||||
scan
|
||||
PRIVATE
|
||||
|
||||
-Ofast
|
||||
#-march=native
|
||||
-fno-stack-protector
|
||||
-fomit-frame-pointer
|
||||
#-freciprocal-math
|
||||
)
|
||||
endif()
|
||||
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
|
||||
|
||||
find_package(cJSON CONFIG REQUIRED)
|
||||
|
||||
2
third-party/libscan/libscan/arc/arc.c
vendored
2
third-party/libscan/libscan/arc/arc.c
vendored
@@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
|
||||
sub_job->vfile.logf = ctx->logf;
|
||||
sub_job->vfile.has_checksum = FALSE;
|
||||
sub_job->vfile.calculate_checksum = f->calculate_checksum;
|
||||
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
|
||||
strcpy(sub_job->parent, doc->doc_id);
|
||||
|
||||
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
|
||||
sub_job->vfile.info = *archive_entry_stat(entry);
|
||||
|
||||
2
third-party/libscan/libscan/ebook/ebook.c
vendored
2
third-party/libscan/libscan/ebook/ebook.c
vendored
@@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
free(samples);
|
||||
av_packet_unref(&jpeg_packet);
|
||||
|
||||
2
third-party/libscan/libscan/font/font.c
vendored
2
third-party/libscan/libscan/font/font.c
vendored
@@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
|
||||
bmp_format(&bmp_data, dimensions, bitmap);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
|
||||
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
|
||||
|
||||
dyn_buffer_destroy(&bmp_data);
|
||||
free(bitmap);
|
||||
|
||||
17
third-party/libscan/libscan/media/media.c
vendored
17
third-party/libscan/libscan/media/media.c
vendored
@@ -459,7 +459,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
return_value = SAVE_THUMBNAIL_OK;
|
||||
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
@@ -473,7 +473,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
|
||||
// Save thumbnail
|
||||
if (thumbnail_index == 0) {
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
return_value = SAVE_THUMBNAIL_OK;
|
||||
|
||||
} else if (thumbnail_index > 1) {
|
||||
@@ -482,9 +482,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
// I figure out a better fix.
|
||||
thumbnail_index -= 1;
|
||||
|
||||
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
|
||||
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
|
||||
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
|
||||
char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
|
||||
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
|
||||
|
||||
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
} else {
|
||||
@@ -579,8 +578,8 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
|
||||
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
|
||||
|
||||
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
|
||||
// Limit to ~1 thumbnail every 5s
|
||||
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
|
||||
// Limit to ~1 thumbnail every 7s
|
||||
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
|
||||
: 1;
|
||||
|
||||
const double seek_increment = thumbnails_to_generate == 1
|
||||
@@ -845,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
if (scaled_frame == STORE_AS_IS) {
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
|
||||
frame_and_packet->packet->size);
|
||||
} else {
|
||||
// Encode frame to jpeg
|
||||
@@ -859,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
|
||||
|
||||
// Save thumbnail
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
avcodec_free_context(&jpeg_encoder);
|
||||
|
||||
2
third-party/libscan/libscan/ooxml/ooxml.c
vendored
2
third-party/libscan/libscan/ooxml/ooxml.c
vendored
@@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
|
||||
archive_read_data(a, buf, entry_size);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
2
third-party/libscan/libscan/raw/raw.c
vendored
2
third-party/libscan/libscan/raw/raw.c
vendored
@@ -84,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
|
||||
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
|
||||
|
||||
APPEND_LONG_META(doc, MetaThumbnail, 1)
|
||||
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
|
||||
|
||||
av_packet_unref(&jpeg_packet);
|
||||
av_free(*scaled_frame->data);
|
||||
|
||||
8
third-party/libscan/libscan/scan.h
vendored
8
third-party/libscan/libscan/scan.h
vendored
@@ -48,6 +48,10 @@ typedef int scan_code_t;
|
||||
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
|
||||
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
|
||||
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
|
||||
|
||||
enum metakey {
|
||||
// String
|
||||
MetaContent = 1,
|
||||
@@ -103,7 +107,7 @@ typedef struct meta_line {
|
||||
|
||||
|
||||
typedef struct document {
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
unsigned long size;
|
||||
unsigned int mime;
|
||||
int mtime;
|
||||
@@ -159,7 +163,7 @@ typedef struct parse_job_t {
|
||||
int base;
|
||||
int ext;
|
||||
struct vfile vfile;
|
||||
unsigned char parent[MD5_DIGEST_LENGTH];
|
||||
char parent[SIST_DOC_ID_LEN];
|
||||
char filepath[1];
|
||||
} parse_job_t;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user