mirror of
				https://github.com/simon987/sist2.git
				synced 2025-10-31 16:06:53 +00:00 
			
		
		
		
	Use WEBP to encode thumbnails
This commit is contained in:
		
							parent
							
								
									e2e0cf260f
								
							
						
					
					
						commit
						610882112d
					
				| @ -28,7 +28,7 @@ sist2 (Simple incremental search tool) | ||||
| 
 | ||||
| \* See [format support](#format-support)     | ||||
| \*\* See [Archive files](#archive-files)     | ||||
| \*\*\* See [OCR](#ocr) | ||||
| \*\*\* See [OCR](#ocr)     | ||||
| \*\*\*\* See [Named-Entity Recognition](#NER) | ||||
| 
 | ||||
| ## Getting Started | ||||
| @ -46,7 +46,7 @@ services: | ||||
|       - "discovery.type=single-node" | ||||
|       - "ES_JAVA_OPTS=-Xms2g -Xmx2g" | ||||
|   sist2-admin: | ||||
|     image: simon987/sist2:3.0.7-x64-linux | ||||
|     image: simon987/sist2:3.1.0-x64-linux | ||||
|     restart: unless-stopped | ||||
|     volumes: | ||||
|       - ./sist2-admin-data/:/sist2-admin/ | ||||
| @ -206,7 +206,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux | ||||
| 3. Install vcpkg dependencies | ||||
| 
 | ||||
|     ```bash | ||||
|     vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample] | ||||
|     vcpkg install curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp] | ||||
|     ``` | ||||
| 
 | ||||
| 4. Build | ||||
|  | ||||
| @ -17,7 +17,7 @@ Lightning-fast file system indexer and search tool. | ||||
| 
 | ||||
| Scan options | ||||
|     -t, --threads=<int>               Number of threads. DEFAULT: 1 | ||||
|     -q, --thumbnail-quality=<int>     Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2 | ||||
|     -q, --thumbnail-quality=<int>     Thumbnail quality, on a scale of 0 to 100, 100 being the best. DEFAULT: 50 | ||||
|     --thumbnail-size=<int>            Thumbnail size, in pixels. DEFAULT: 552 | ||||
|     --thumbnail-count=<int>           Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT: 1 | ||||
|     --content-size=<int>              Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT: 32768 | ||||
| @ -88,8 +88,8 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0 | ||||
| 
 | ||||
| See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments: | ||||
| 
 | ||||
| For example, `--thumbnail-size=500`, `--thumbnail-quality=2` for a directory with 8 million images will create a thumbnail database  | ||||
| that is about `8000000 * 36kB = 288GB`. | ||||
| For example, `--thumbnail-size=500`, `--thumbnail-quality=50` for a directory with 8 million images will create a thumbnail database  | ||||
| that is about `8000000 * 11.8kB = 94.4GB`. | ||||
| 
 | ||||
|  | ||||
| 
 | ||||
|  | ||||
										
											Binary file not shown.
										
									
								
							| Before Width: | Height: | Size: 180 KiB After Width: | Height: | Size: 169 KiB | 
| @ -5,7 +5,7 @@ | ||||
| #define DEFAULT_OUTPUT "index.sist2" | ||||
| #define DEFAULT_NAME "index" | ||||
| #define DEFAULT_CONTENT_SIZE 32768 | ||||
| #define DEFAULT_QUALITY 2 | ||||
| #define DEFAULT_QUALITY 50 | ||||
| #define DEFAULT_THUMBNAIL_SIZE 552 | ||||
| #define DEFAULT_THUMBNAIL_COUNT 1 | ||||
| #define DEFAULT_REWRITE_URL "" | ||||
| @ -100,8 +100,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
| 
 | ||||
|     if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) { | ||||
|         args->tn_quality = DEFAULT_QUALITY; | ||||
|     } else if (args->tn_quality < 2 || args->tn_quality > 31) { | ||||
|         fprintf(stderr, "Invalid value for --thumbnail-quality argument: %d. Must be within [2, 31].\n", | ||||
|     } else if (args->tn_quality < 0 || args->tn_quality > 100) { | ||||
|         fprintf(stderr, "Invalid value for --thumbnail-quality argument: %d. Must be within [0, 100].\n", | ||||
|                 args->tn_quality); | ||||
|         return 1; | ||||
|     } | ||||
| @ -109,7 +109,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { | ||||
|     if (args->tn_size == OPTION_VALUE_UNSPECIFIED) { | ||||
|         args->tn_size = DEFAULT_THUMBNAIL_SIZE; | ||||
|     } else if (args->tn_size < 32) { | ||||
|         printf("Invalid value --thumbnail-size argument: %d. Must be greater than 32 pixels.\n", args->tn_size); | ||||
|         printf("Invalid value --thumbnail-size argument: %d. Must be >= 32 pixels.\n", args->tn_size); | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -490,7 +490,7 @@ int main(int argc, const char *argv[]) { | ||||
|             OPT_GROUP("Scan options"), | ||||
|             OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT: 1"), | ||||
|             OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality, | ||||
|                         "Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT: 2", | ||||
|                         "Thumbnail quality, on a scale of 0 to 100, 100 being the best. DEFAULT: 50", | ||||
|                         set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality), | ||||
|             OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size, | ||||
|                         "Thumbnail size, in pixels. DEFAULT: 552", | ||||
|  | ||||
| @ -51,11 +51,11 @@ | ||||
| #include <ctype.h> | ||||
| #include "git_hash.h" | ||||
| 
 | ||||
| #define VERSION "3.0.7" | ||||
| #define VERSION "3.1.0" | ||||
| static const char *const Version = VERSION; | ||||
| static const int VersionMajor = 3; | ||||
| static const int VersionMinor = 0; | ||||
| static const int VersionPatch = 7; | ||||
| static const int VersionMinor = 1; | ||||
| static const int VersionPatch = 0; | ||||
| 
 | ||||
| #ifndef SIST_PLATFORM | ||||
| #define SIST_PLATFORM unknown | ||||
|  | ||||
							
								
								
									
										19
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								third-party/libscan/libscan/ebook/ebook.c
									
									
									
									
										vendored
									
									
								
							| @ -153,22 +153,23 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
| 
 | ||||
|     // YUV420p -> JPEG
 | ||||
|     AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(pixmap->w, pixmap->h, ctx->tn_qscale); | ||||
|     avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
|     // YUV420p -> JPEG/WEBP
 | ||||
|     AVCodecContext *thumbnail_encoder = alloc_webp_encoder(pixmap->w, pixmap->h, ctx->tn_qscale); | ||||
|     avcodec_send_frame(thumbnail_encoder, scaled_frame); | ||||
|     avcodec_send_frame(thumbnail_encoder, NULL); // Send EOF
 | ||||
| 
 | ||||
|     AVPacket jpeg_packet; | ||||
|     av_init_packet(&jpeg_packet); | ||||
|     avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
|     AVPacket thumbnail_packet; | ||||
|     av_init_packet(&thumbnail_packet); | ||||
|     avcodec_receive_packet(thumbnail_encoder, &thumbnail_packet); | ||||
| 
 | ||||
|     APPEND_LONG_META(doc, MetaThumbnail, 1); | ||||
|     ctx->store(doc->doc_id, 0, (char *) jpeg_packet.data, jpeg_packet.size); | ||||
|     ctx->store(doc->doc_id, 0, (char *) thumbnail_packet.data, thumbnail_packet.size); | ||||
| 
 | ||||
|     free(samples); | ||||
|     av_packet_unref(&jpeg_packet); | ||||
|     av_packet_unref(&thumbnail_packet); | ||||
|     av_free(*scaled_frame->data); | ||||
|     av_frame_free(&scaled_frame); | ||||
|     avcodec_free_context(&jpeg_encoder); | ||||
|     avcodec_free_context(&thumbnail_encoder); | ||||
| 
 | ||||
|     fz_drop_pixmap(fzctx, pixmap); | ||||
|     fz_drop_page(fzctx, cover); | ||||
|  | ||||
							
								
								
									
										31
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								third-party/libscan/libscan/media/media.c
									
									
									
									
										vendored
									
									
								
							| @ -68,7 +68,7 @@ void *scale_frame(const AVCodecContext *decoder, const AVFrame *frame, int size) | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             decoder->width, decoder->height, decoder->pix_fmt, | ||||
|             dstW, dstH, AV_PIX_FMT_YUVJ420P, | ||||
|             dstW, dstH, AV_PIX_FMT_YUV420P, | ||||
|             SIST_SWS_ALGO, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
| @ -436,7 +436,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor | ||||
|         } | ||||
| 
 | ||||
|         if (seek_ok == FALSE && thumbnail_index != 0) { | ||||
|             CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails."); | ||||
|             CTX_LOG_WARNING(doc->filepath, | ||||
|                             "(media.c) Could not seek media file. Can't generate additional thumbnails."); | ||||
|             return SAVE_THUMBNAIL_FAILED; | ||||
|         } | ||||
|     } | ||||
| @ -470,18 +471,19 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor | ||||
| 
 | ||||
|         ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size); | ||||
|     } else { | ||||
|         // Encode frame to jpeg
 | ||||
|         AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, | ||||
|                                                           ctx->tn_qscale); | ||||
|         avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
|         // Encode frame
 | ||||
|         AVCodecContext *thumbnail_encoder = alloc_webp_encoder(scaled_frame->width, scaled_frame->height, | ||||
|                                                                ctx->tn_qscale); | ||||
|         avcodec_send_frame(thumbnail_encoder, scaled_frame); | ||||
|         avcodec_send_frame(thumbnail_encoder, NULL); // send EOF
 | ||||
| 
 | ||||
|         AVPacket jpeg_packet; | ||||
|         av_init_packet(&jpeg_packet); | ||||
|         avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
|         AVPacket thumbnail_packet; | ||||
|         av_init_packet(&thumbnail_packet); | ||||
|         avcodec_receive_packet(thumbnail_encoder, &thumbnail_packet); | ||||
| 
 | ||||
|         // Save thumbnail
 | ||||
|         if (thumbnail_index == 0) { | ||||
|             ctx->store(doc->doc_id, 0, jpeg_packet.data, jpeg_packet.size); | ||||
|             ctx->store(doc->doc_id, 0, thumbnail_packet.data, thumbnail_packet.size); | ||||
|             return_value = SAVE_THUMBNAIL_OK; | ||||
| 
 | ||||
|         } else if (thumbnail_index > 1) { | ||||
| @ -489,15 +491,15 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor | ||||
|             //  I figure out a better fix.
 | ||||
|             thumbnail_index -= 1; | ||||
| 
 | ||||
|             ctx->store(doc->doc_id, thumbnail_index, jpeg_packet.data, jpeg_packet.size); | ||||
|             ctx->store(doc->doc_id, thumbnail_index, thumbnail_packet.data, thumbnail_packet.size); | ||||
| 
 | ||||
|             return_value = SAVE_THUMBNAIL_OK; | ||||
|         } else { | ||||
|             return_value = SAVE_THUMBNAIL_SKIPPED; | ||||
|         } | ||||
| 
 | ||||
|         avcodec_free_context(&jpeg_encoder); | ||||
|         av_packet_unref(&jpeg_packet); | ||||
|         avcodec_free_context(&thumbnail_encoder); | ||||
|         av_packet_unref(&thumbnail_packet); | ||||
|         av_free(*scaled_frame->data); | ||||
|         av_frame_free(&scaled_frame); | ||||
|     } | ||||
| @ -854,9 +856,10 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu | ||||
|         ctx->store(doc->doc_id, 0, frame_and_packet->packet->data, frame_and_packet->packet->size); | ||||
|     } else { | ||||
|         // Encode frame to jpeg
 | ||||
|         AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, | ||||
|         AVCodecContext *jpeg_encoder = alloc_webp_encoder(scaled_frame->width, scaled_frame->height, | ||||
|                                                           ctx->tn_qscale); | ||||
|         avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
|         avcodec_send_frame(jpeg_encoder, NULL); // Send EOF
 | ||||
| 
 | ||||
|         AVPacket jpeg_packet; | ||||
|         av_init_packet(&jpeg_packet); | ||||
|  | ||||
							
								
								
									
										22
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								third-party/libscan/libscan/media/media.h
									
									
									
									
										vendored
									
									
								
							| @ -48,6 +48,28 @@ static AVCodecContext *alloc_jpeg_encoder(int w, int h, int qscale) { | ||||
|     return jpeg; | ||||
| } | ||||
| 
 | ||||
| static AVCodecContext *alloc_webp_encoder(int w, int h, int qscale) { | ||||
| 
 | ||||
|     const AVCodec *webp_codec = avcodec_find_encoder(AV_CODEC_ID_WEBP); | ||||
|     AVCodecContext *webp = avcodec_alloc_context3(webp_codec); | ||||
|     webp->width = w; | ||||
|     webp->height = h; | ||||
|     webp->time_base.den = 1000000; | ||||
|     webp->time_base.num = 1; | ||||
|     webp->compression_level = 6; | ||||
|     webp->global_quality = FF_QP2LAMBDA * qscale; | ||||
| 
 | ||||
|     webp->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
|     webp->color_range = AVCOL_RANGE_JPEG; | ||||
|     int ret = avcodec_open2(webp, webp_codec, NULL); | ||||
| 
 | ||||
|     if (ret != 0) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     return webp; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str); | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										19
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								third-party/libscan/libscan/raw/raw.c
									
									
									
									
										vendored
									
									
								
							| @ -52,7 +52,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do | ||||
| 
 | ||||
|     struct SwsContext *sws_ctx = sws_getContext( | ||||
|             img->width, img->height, AV_PIX_FMT_RGB24, | ||||
|             dstW, dstH, AV_PIX_FMT_YUVJ420P, | ||||
|             dstW, dstH, AV_PIX_FMT_YUV420P, | ||||
|             SIST_SWS_ALGO, 0, 0, 0 | ||||
|     ); | ||||
| 
 | ||||
| @ -76,20 +76,21 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do | ||||
| 
 | ||||
|     sws_freeContext(sws_ctx); | ||||
| 
 | ||||
|     AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height, 1.0f); | ||||
|     avcodec_send_frame(jpeg_encoder, scaled_frame); | ||||
|     AVCodecContext *thumbnail_encoder = alloc_webp_encoder(scaled_frame->width, scaled_frame->height, ctx->tn_qscale); | ||||
|     avcodec_send_frame(thumbnail_encoder, scaled_frame); | ||||
|     avcodec_send_frame(thumbnail_encoder, NULL); // Send EOF
 | ||||
| 
 | ||||
|     AVPacket jpeg_packet; | ||||
|     av_init_packet(&jpeg_packet); | ||||
|     avcodec_receive_packet(jpeg_encoder, &jpeg_packet); | ||||
|     AVPacket thumbnail_packet; | ||||
|     av_init_packet(&thumbnail_packet); | ||||
|     avcodec_receive_packet(thumbnail_encoder, &thumbnail_packet); | ||||
| 
 | ||||
|     APPEND_LONG_META(doc, MetaThumbnail, 1); | ||||
|     ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size); | ||||
|     ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) thumbnail_packet.data, thumbnail_packet.size); | ||||
| 
 | ||||
|     av_packet_unref(&jpeg_packet); | ||||
|     av_packet_unref(&thumbnail_packet); | ||||
|     av_free(*scaled_frame->data); | ||||
|     av_frame_free(&scaled_frame); | ||||
|     avcodec_free_context(&jpeg_encoder); | ||||
|     avcodec_free_context(&thumbnail_encoder); | ||||
| 
 | ||||
|     return TRUE; | ||||
| } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user