mirror of
https://github.com/simon987/sist2.git
synced 2025-12-11 14:38:54 +00:00
UI fix, disable thumbnail option, batch index size option
This commit is contained in:
15
src/cli.c
15
src/cli.c
@@ -7,6 +7,7 @@
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
|
||||
#define DEFAULT_BIND_ADDR "localhost"
|
||||
#define DEFAULT_PORT "4090"
|
||||
@@ -33,6 +34,7 @@ void scan_args_destroy(scan_args_t *args) {
|
||||
free(args);
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
@@ -42,6 +44,7 @@ void web_args_destroy(web_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
@@ -74,16 +77,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->size == 0) {
|
||||
args->size = DEFAULT_SIZE;
|
||||
} else if (args->size <= 0) {
|
||||
fprintf(stderr, "Invalid size: %d\n", args->size);
|
||||
} else if (args->size > 0 && args->size < 32) {
|
||||
printf("Invalid size: %d\n", args->content_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->content_size == 0) {
|
||||
args->content_size = DEFAULT_CONTENT_SIZE;
|
||||
} else if (args->content_size <= 0) {
|
||||
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->threads == 0) {
|
||||
@@ -168,6 +168,11 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
args->batch_size = DEFAULT_BATCH_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ typedef struct index_args {
|
||||
const char *script_path;
|
||||
char *script;
|
||||
int print;
|
||||
int batch_size;
|
||||
int force_reset;
|
||||
} index_args_t;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ struct {
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
struct {
|
||||
char *es_url;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
|
||||
struct {
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "static_generated.c"
|
||||
|
||||
#define BULK_INDEX_SIZE 100
|
||||
|
||||
typedef struct es_indexer {
|
||||
int queued;
|
||||
@@ -173,7 +172,7 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
Indexer->queued += 1;
|
||||
|
||||
if (Indexer->queued >= BULK_INDEX_SIZE) {
|
||||
if (Indexer->queued >= IndexCtx.batch_size) {
|
||||
elastic_flush();
|
||||
}
|
||||
}
|
||||
|
||||
29
src/main.c
29
src/main.c
@@ -10,7 +10,7 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.1.8";
|
||||
static const char *const Version = "1.1.9";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
@@ -41,10 +41,22 @@ void init_dir(const char *dirpath) {
|
||||
void scan_print_header() {
|
||||
printf("sist2 V%s\n", Version);
|
||||
printf("---------------------\n");
|
||||
printf("threads\t\t%d\n", ScanCtx.threads);
|
||||
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
|
||||
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
|
||||
printf("output\t\t%s\n", ScanCtx.index.path);
|
||||
printf("threads\t\t\t%d\n", ScanCtx.threads);
|
||||
printf("tn_qscale\t\t%.1f/31.0\n", ScanCtx.tn_qscale);
|
||||
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
printf("tn_size\t\t\t%dpx\n", ScanCtx.tn_size);
|
||||
} else {
|
||||
printf("tn_size\t\t\tdisabled\n");
|
||||
}
|
||||
|
||||
if (ScanCtx.content_size > 0) {
|
||||
printf("content_size\t%d B\n", ScanCtx.content_size);
|
||||
} else {
|
||||
printf("content_size\t\t\tdisabled\n");
|
||||
}
|
||||
|
||||
printf("output\t\t\t%s\n", ScanCtx.index.path);
|
||||
}
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
@@ -130,6 +142,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.batch_size = args->batch_size;
|
||||
|
||||
if (!args->print) {
|
||||
elastic_init(args->force_reset);
|
||||
@@ -226,9 +239,10 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
|
||||
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=4096"),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
@@ -242,6 +256,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
|
||||
@@ -167,6 +167,10 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
strcpy(meta_name->strval, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ScanCtx.tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
|
||||
@@ -242,7 +242,7 @@ void parse_media(const char *filepath, document_t *doc) {
|
||||
}
|
||||
}
|
||||
|
||||
if (video_stream != -1) {
|
||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
|
||||
@@ -97,33 +97,31 @@ void parse(void *arg) {
|
||||
|
||||
int mmime = MAJOR_MIME(doc.mime);
|
||||
|
||||
parse_text(bytes_read, &fd, (char *) buf, &doc);
|
||||
if (!(SHOULD_PARSE(doc.mime))) {
|
||||
|
||||
// if (!(SHOULD_PARSE(doc.mime))) {
|
||||
//
|
||||
// } else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
// (mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
// parse_media(job->filepath, &doc);
|
||||
//
|
||||
// } else if (IS_PDF(doc.mime)) {
|
||||
// void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
// parse_pdf(pdf_buf, doc.size, &doc);
|
||||
//
|
||||
// if (pdf_buf != buf && pdf_buf != NULL) {
|
||||
// free(pdf_buf);
|
||||
// }
|
||||
//
|
||||
// } else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
// parse_text(bytes_read, &fd, (char *) buf, &doc);
|
||||
//
|
||||
// } else if (IS_FONT(doc.mime)) {
|
||||
// void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
// parse_font(font_buf, doc.size, &doc);
|
||||
//
|
||||
// if (font_buf != buf && font_buf != NULL) {
|
||||
// free(font_buf);
|
||||
// }
|
||||
// }
|
||||
} else if ((mmime == MimeVideo && doc.size >= MIN_VIDEO_SIZE) ||
|
||||
(mmime == MimeImage && doc.size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
|
||||
parse_media(job->filepath, &doc);
|
||||
|
||||
} else if (IS_PDF(doc.mime)) {
|
||||
void *pdf_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
parse_pdf(pdf_buf, doc.size, &doc);
|
||||
|
||||
if (pdf_buf != buf && pdf_buf != NULL) {
|
||||
free(pdf_buf);
|
||||
}
|
||||
|
||||
} else if (mmime == MimeText && ScanCtx.content_size > 0) {
|
||||
parse_text(bytes_read, &fd, (char *) buf, &doc);
|
||||
|
||||
} else if (IS_FONT(doc.mime)) {
|
||||
void *font_buf = read_all(job, (char *) buf, bytes_read, &fd);
|
||||
parse_font(font_buf, doc.size, &doc);
|
||||
|
||||
if (font_buf != buf && font_buf != NULL) {
|
||||
free(font_buf);
|
||||
}
|
||||
}
|
||||
|
||||
write_document(&doc);
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#include <src/ctx.h>
|
||||
#include "pdf.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
@@ -178,7 +177,17 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
|
||||
return;
|
||||
}
|
||||
|
||||
fz_page *cover = render_cover(ctx, doc, fzdoc);
|
||||
fz_page *cover = NULL;
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
cover = render_cover(ctx, doc, fzdoc);
|
||||
} else {
|
||||
fz_var(cover);
|
||||
fz_try(ctx)
|
||||
cover = fz_load_page(ctx, fzdoc, 0);
|
||||
fz_catch(ctx)
|
||||
cover = NULL;
|
||||
}
|
||||
|
||||
if (cover == NULL) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user