Update --thumbnail-quality argument, add documentation

This commit is contained in:
simon987 2023-01-29 11:24:34 -05:00
parent 9972e21fcc
commit 2e3d648796
16 changed files with 40 additions and 30 deletions

View File

@ -12,7 +12,7 @@ REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--thumbnail-quality 2 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \

View File

@ -12,7 +12,7 @@ REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--thumbnail-quality 2 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \

View File

@ -33,7 +33,7 @@ Lightning-fast file system indexer and search tool.
Scan options
-t, --threads=<int> Number of threads. DEFAULT=1
--mem-throttle=<int> Total memory threshold in MiB for scan throttling. DEFAULT=0
-q, --thumbnail-quality=<flt> Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1
-q, --thumbnail-quality=<int> Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2
--thumbnail-size=<int> Thumbnail size, in pixels. DEFAULT=500
--thumbnail-count=<int> Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1
--content-size=<int> Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768
@ -101,7 +101,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
Total memory threshold in MiB for scan throttling. Worker threads will not start a new parse job
until the total memory usage of sist2 is below this threshold. Set to 0 to disable. DEFAULT=0
* `-q, --thumbnail-quality`
Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best.
Thumbnail quality, on a scale of 2 to 32, 2 being the best. See section below for a rough estimate of thumbnail database size
* `--thumbnail-size`
Thumbnail size in pixels.
* `--thumbnail-count`
@ -154,6 +154,16 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
#### Thumbnail database size estimation
See chart below for rough estimate of thumbnail size vs. thumbnail size & quality arguments:
For example, `--thumbnail-size=500`, `--thumbnail-quality=2` for a directory with 8 million images will create a thumbnail database
that is about `8000000 * 6kB = 288GB`.
![thumbnail_size](thumbnail_size.png)
### Scan examples
Simple scan
@ -161,7 +171,7 @@ Simple scan
sist2 scan ~/Documents
sist2 scan \
--threads 4 --content-size 16000000 --quality 1.0 --archive shallow \
--threads 4 --content-size 16000000 --thumbnail-quality 2 --archive shallow \
--name "My Documents" --rewrite-url "http://nas.domain.local/My Documents/" \
~/Documents -o ./documents.idx/
```

BIN
docs/thumbnail_size.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

View File

@ -62,7 +62,7 @@ export default {
path: "Path",
threads: "Number of threads",
memThrottle: "Total memory threshold in MiB for scan throttling",
thumbnailQuality: "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best",
thumbnailQuality: "Thumbnail quality, on a scale of 2 to 32, 2 being the best",
thumbnailCount: "Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails.",
thumbnailSize: "Thumbnail size, in pixels",
contentSize: "Number of bytes to be extracted from text documents. Set to 0 to disable",

View File

@ -111,7 +111,7 @@ class ScanOptions(BaseModel):
path: str
threads: int = 1
mem_throttle: int = 0
thumbnail_quality: float = 1.0
thumbnail_quality: int = 2
thumbnail_size: int = 500
thumbnail_count: int = 1
content_size: int = 32768

View File

@ -4,7 +4,7 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 1
#define DEFAULT_QUALITY 2
#define DEFAULT_THUMBNAIL_SIZE 500
#define DEFAULT_THUMBNAIL_COUNT 1
#define DEFAULT_REWRITE_URL ""
@ -112,8 +112,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
args->tn_quality = DEFAULT_QUALITY;
} else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
} else if (args->tn_quality < 2 || args->tn_quality > 31) {
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %d. Must be within [2, 31].\n",
args->tn_quality);
return 1;
}

View File

@ -9,7 +9,7 @@
#define OPTION_VALUE_UNSPECIFIED (0)
typedef struct scan_args {
float tn_quality;
int tn_quality;
int tn_size;
int content_size;
int threads;

View File

@ -650,8 +650,8 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
"Total memory threshold in MiB for scan throttling. DEFAULT=0",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
OPT_INTEGER('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 2 to 31, 2 being the best. DEFAULT=2",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",

View File

@ -49,7 +49,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.13.1"
#define VERSION "2.14.0"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

File diff suppressed because one or more lines are too long

View File

@ -11,7 +11,7 @@ typedef struct {
int enable_tn;
int tn_size;
float tn_qscale;
int tn_qscale;
unsigned int cbr_mime;
unsigned int cbz_mime;

View File

@ -15,7 +15,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int fast_epub_parse;
float tn_qscale;
int tn_qscale;
} scan_ebook_ctx_t;
void parse_ebook(scan_ebook_ctx_t *ctx, vfile_t *f, const char *mime_str, document_t *doc);

View File

@ -16,7 +16,7 @@ typedef struct {
store_callback_t store;
int tn_size;
float tn_qscale;
int tn_qscale;
/** Number of thumbnails to generate for videos */
int tn_count;
@ -28,7 +28,7 @@ typedef struct {
} scan_media_ctx_t;
__always_inline
static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
static AVCodecContext *alloc_jpeg_encoder(int w, int h, int qscale) {
const AVCodec *jpeg_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
AVCodecContext *jpeg = avcodec_alloc_context3(jpeg_codec);
@ -36,7 +36,7 @@ static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
jpeg->height = h;
jpeg->time_base.den = 1000000;
jpeg->time_base.num = 1;
jpeg->i_quant_factor = qscale;
jpeg->i_quant_factor = (float) qscale;
jpeg->pix_fmt = AV_PIX_FMT_YUVJ420P;
int ret = avcodec_open2(jpeg, jpeg_codec, NULL);
@ -49,7 +49,7 @@ static AVCodecContext *alloc_jpeg_encoder(int w, int h, float qscale) {
}
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char*mime_str);
void parse_media(scan_media_ctx_t *ctx, vfile_t *f, document_t *doc, const char *mime_str);
void init_media();

View File

@ -10,7 +10,7 @@ typedef struct {
int enable_tn;
int tn_size;
float tn_qscale;
int tn_qscale;
} scan_raw_ctx_t;
void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc);

View File

@ -1133,7 +1133,7 @@ int main(int argc, char **argv) {
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
ebook_ctx.tn_qscale = 1.0;
ebook_ctx.tn_qscale = 2;
ebook_500_ctx = ebook_ctx;
ebook_500_ctx.content_size = 500;
@ -1141,14 +1141,14 @@ int main(int argc, char **argv) {
ebook_fast_ctx = ebook_500_ctx;
ebook_fast_ctx.fast_epub_parse = 1;
comic_ctx.tn_qscale = 1.0;
comic_ctx.tn_qscale = 2;
comic_ctx.tn_size = 500;
comic_ctx.enable_tn = TRUE;
comic_ctx.log = noop_log;
comic_ctx.logf = noop_logf;
comic_ctx.store = counter_store;
comic_big_ctx.tn_qscale = 1.0;
comic_big_ctx.tn_qscale = 2;
comic_big_ctx.tn_size = 5000;
comic_big_ctx.enable_tn = TRUE;
comic_big_ctx.log = noop_log;
@ -1160,7 +1160,7 @@ int main(int argc, char **argv) {
media_ctx.store = counter_store;
media_ctx.tn_size = 500;
media_ctx.tn_count = 1;
media_ctx.tn_qscale = 1.0;
media_ctx.tn_qscale = 2;
media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;
ooxml_500_ctx.content_size = 500;