Compare commits

...

7 Commits

Author SHA1 Message Date
d816dae8b3 UI fix, disable thumbnail option, batch index size option 2019-12-01 10:57:29 -05:00
4346c3e063 Also use static libraries in sist2 build 2019-11-30 20:02:26 -05:00
1a1032a8a7 Cleaner shutdown 2019-11-30 19:59:11 -05:00
4ab2ba1a02 #8 Skip PDF scan when content-size is 0 2019-11-21 16:06:31 -05:00
d089601dc5 Add sfv & m3u 2019-11-20 12:31:31 -05:00
11df6cc88f Add nfo to ext list 2019-11-20 11:41:50 -05:00
373ac01e4e Fix for #3 and maximum scan depth 2019-11-19 11:23:30 -05:00
23 changed files with 217 additions and 111 deletions

View File

@@ -157,8 +157,8 @@ if (WITH_SIST2)
m m
bz2 bz2
magic magic
harfbuzz ${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
openjp2 ${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
freetype freetype
) )

View File

@@ -1,3 +1,4 @@
rm ./sist2
cp ../sist2 . cp ../sist2 .
version=$(./sist2 --version) version=$(./sist2 --version)

View File

@@ -252,7 +252,7 @@ text/html, acgi|htm|html|htmls|htx|shtml
text/javascript, js text/javascript, js
text/mcf, mcf text/mcf, mcf
text/pascal, pas text/pascal, pas
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u
text/richtext, rt|rtf|rtx text/richtext, rt|rtf|rtx
text/rtf, text/rtf,
text/scriplet, wsc text/scriplet, wsc
1 application/arj arj
252 text/javascript js
253 text/mcf mcf
254 text/pascal pas
255 text/plain com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u
256 text/richtext rt|rtf|rtx
257 text/rtf
258 text/scriplet wsc

View File

@@ -7,6 +7,7 @@
#define DEFAULT_REWRITE_URL "" #define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200" #define DEFAULT_ES_URL "http://localhost:9200"
#define DEFAULT_BATCH_SIZE 100
#define DEFAULT_BIND_ADDR "localhost" #define DEFAULT_BIND_ADDR "localhost"
#define DEFAULT_PORT "4090" #define DEFAULT_PORT "4090"
@@ -14,9 +15,37 @@
scan_args_t *scan_args_create() { scan_args_t *scan_args_create() {
scan_args_t *args = calloc(sizeof(scan_args_t), 1); scan_args_t *args = calloc(sizeof(scan_args_t), 1);
args->depth = -1;
return args; return args;
} }
void scan_args_destroy(scan_args_t *args) {
if (args->name != NULL) {
free(args->name);
}
if (args->path != NULL) {
free(args->path);
}
if (args->output != NULL) {
free(args->output);
}
free(args);
}
#ifndef SIST_SCAN_ONLY
void index_args_destroy(index_args_t *args) {
//todo
free(args);
}
void web_args_destroy(web_args_t *args) {
//todo
free(args);
}
#endif
int scan_args_validate(scan_args_t *args, int argc, const char **argv) { int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (argc < 2) { if (argc < 2) {
fprintf(stderr, "Required positional argument: PATH.\n"); fprintf(stderr, "Required positional argument: PATH.\n");
@@ -48,16 +77,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->size == 0) { if (args->size == 0) {
args->size = DEFAULT_SIZE; args->size = DEFAULT_SIZE;
} else if (args->size <= 0) { } else if (args->size > 0 && args->size < 32) {
fprintf(stderr, "Invalid size: %d\n", args->size); printf("Invalid size: %d\n", args->content_size);
return 1; return 1;
} }
if (args->content_size == 0) { if (args->content_size == 0) {
args->content_size = DEFAULT_CONTENT_SIZE; args->content_size = DEFAULT_CONTENT_SIZE;
} else if (args->content_size <= 0) {
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
return 1;
} }
if (args->threads == 0) { if (args->threads == 0) {
@@ -80,6 +106,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1; return 1;
} }
if (args->depth < 0) {
args->depth = G_MAXINT32;
} else {
args->depth += 1;
}
if (args->name == NULL) { if (args->name == NULL) {
args->name = g_path_get_basename(args->output); args->name = g_path_get_basename(args->output);
} }
@@ -104,6 +136,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
return 1; return 1;
} else { } else {
args->index_path = argv[1]; args->index_path = argv[1];
free(index_path);
} }
if (args->es_url == NULL) { if (args->es_url == NULL) {
@@ -135,6 +168,11 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
*(args->script + info.st_size) = '\0'; *(args->script + info.st_size) = '\0';
close(fd); close(fd);
} }
if (args->batch_size == 0) {
args->batch_size = DEFAULT_BATCH_SIZE;
}
return 0; return 0;
} }

View File

@@ -12,10 +12,12 @@ typedef struct scan_args {
char *output; char *output;
char *rewrite_url; char *rewrite_url;
char *name; char *name;
int depth;
char *path; char *path;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();
void scan_args_destroy(scan_args_t *args);
int scan_args_validate(scan_args_t *args, int argc, const char **argv); int scan_args_validate(scan_args_t *args, int argc, const char **argv);
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
@@ -25,6 +27,7 @@ typedef struct index_args {
const char *script_path; const char *script_path;
char *script; char *script;
int print; int print;
int batch_size;
int force_reset; int force_reset;
} index_args_t; } index_args_t;
@@ -39,7 +42,10 @@ typedef struct web_args {
} web_args_t; } web_args_t;
index_args_t *index_args_create(); index_args_t *index_args_create();
void index_args_destroy(index_args_t *args);
web_args_t *web_args_create(); web_args_t *web_args_create();
void web_args_destroy(web_args_t *args);
int index_args_validate(index_args_t *args, int argc, const char **argv); int index_args_validate(index_args_t *args, int argc, const char **argv);
int web_args_validate(web_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv);

View File

@@ -15,6 +15,7 @@ struct {
int threads; int threads;
int content_size; int content_size;
float tn_qscale; float tn_qscale;
int depth;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;
@@ -29,12 +30,13 @@ struct {
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
struct { struct {
char *es_url; char *es_url;
int batch_size;
} IndexCtx; } IndexCtx;
struct { struct {
char *es_url; char *es_url;
int index_count; int index_count;
char* b64credentials; char *b64credentials;
struct index_t indices[16]; struct index_t indices[16];
} WebCtx; } WebCtx;
#endif #endif

View File

@@ -9,7 +9,6 @@
#include "static_generated.c" #include "static_generated.c"
#define BULK_INDEX_SIZE 100
typedef struct es_indexer { typedef struct es_indexer {
int queued; int queued;
@@ -173,7 +172,7 @@ void elastic_index_line(es_bulk_line_t *line) {
Indexer->queued += 1; Indexer->queued += 1;
if (Indexer->queued >= BULK_INDEX_SIZE) { if (Indexer->queued >= IndexCtx.batch_size) {
elastic_flush(); elastic_flush();
} }
} }

View File

@@ -1,7 +1,7 @@
#include "src/ctx.h" #include "src/ctx.h"
#include "serialize.h" #include "serialize.h"
static __thread int IndexFd = -1; static __thread int index_fd = -1;
typedef struct { typedef struct {
unsigned char uuid[16]; unsigned char uuid[16];
@@ -119,13 +119,13 @@ char *get_meta_key_text(enum metakey meta_key) {
void write_document(document_t *doc) { void write_document(document_t *doc) {
if (IndexFd == -1) { if (index_fd == -1) {
char dstfile[PATH_MAX]; char dstfile[PATH_MAX];
pthread_t self = pthread_self(); pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self); snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR); index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (IndexFd == -1) { if (index_fd == -1) {
perror("open"); perror("open");
} }
} }
@@ -158,13 +158,16 @@ void write_document(document_t *doc) {
} }
dyn_buffer_write_char(&buf, '\n'); dyn_buffer_write_char(&buf, '\n');
write(IndexFd, buf.buf, buf.cur); int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
perror("write");
}
ScanCtx.stat_index_size += buf.cur; ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf); dyn_buffer_destroy(&buf);
} }
void serializer_cleanup() { void thread_cleanup() {
close(IndexFd); close(index_fd);
} }
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) { void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {

View File

@@ -18,7 +18,7 @@ void incremental_read(GHashTable *table, const char *filepath);
/** /**
* Must be called after write_document * Must be called after write_document
*/ */
void serializer_cleanup(); void thread_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc); void write_index_descriptor(char *path, index_descriptor_t *desc);

View File

@@ -20,7 +20,7 @@ parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int
} }
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) { if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_parse_job(filepath, info, ftw->base); parse_job_t *job = create_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job); tpool_add_work(ScanCtx.pool, parse, job);
} }

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0" #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.1.7"; static const char *const Version = "1.1.9";
static const char *const usage[] = { static const char *const usage[] = {
"sist2 scan [OPTION]... PATH", "sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX", "sist2 index [OPTION]... INDEX",
@@ -19,9 +19,9 @@ static const char *const usage[] = {
}; };
void global_init() { void global_init() {
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
curl_global_init(CURL_GLOBAL_NOTHING); curl_global_init(CURL_GLOBAL_NOTHING);
#endif #endif
av_log_set_level(AV_LOG_QUIET); av_log_set_level(AV_LOG_QUIET);
} }
@@ -41,10 +41,22 @@ void init_dir(const char *dirpath) {
void scan_print_header() { void scan_print_header() {
printf("sist2 V%s\n", Version); printf("sist2 V%s\n", Version);
printf("---------------------\n"); printf("---------------------\n");
printf("threads\t\t%d\n", ScanCtx.threads); printf("threads\t\t\t%d\n", ScanCtx.threads);
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale); printf("tn_qscale\t\t%.1f/31.0\n", ScanCtx.tn_qscale);
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
printf("output\t\t%s\n", ScanCtx.index.path); if (ScanCtx.tn_size > 0) {
printf("tn_size\t\t\t%dpx\n", ScanCtx.tn_size);
} else {
printf("tn_size\t\t\tdisabled\n");
}
if (ScanCtx.content_size > 0) {
printf("content_size\t%d B\n", ScanCtx.content_size);
} else {
printf("content_size\t\t\tdisabled\n");
}
printf("output\t\t\t%s\n", ScanCtx.index.path);
} }
void sist2_scan(scan_args_t *args) { void sist2_scan(scan_args_t *args) {
@@ -53,6 +65,7 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.tn_size = args->size; ScanCtx.tn_size = args->size;
ScanCtx.content_size = args->content_size; ScanCtx.content_size = args->content_size;
ScanCtx.threads = args->threads; ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path)); strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name)); strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root)); strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
@@ -92,7 +105,7 @@ void sist2_scan(scan_args_t *args) {
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)); printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
} }
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup); ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
tpool_start(ScanCtx.pool); tpool_start(ScanCtx.pool);
walk_directory_tree(ScanCtx.index.desc.root); walk_directory_tree(ScanCtx.index.desc.root);
tpool_wait(ScanCtx.pool); tpool_wait(ScanCtx.pool);
@@ -125,9 +138,11 @@ void sist2_scan(scan_args_t *args) {
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
void sist2_index(index_args_t *args) { void sist2_index(index_args_t *args) {
IndexCtx.es_url = args->es_url; IndexCtx.es_url = args->es_url;
IndexCtx.batch_size = args->batch_size;
if (!args->print) { if (!args->print) {
elastic_init(args->force_reset); elastic_init(args->force_reset);
@@ -197,6 +212,7 @@ void sist2_web(web_args_t *args) {
serve(args->bind, args->port); serve(args->bind, args->port);
} }
#endif #endif
@@ -205,14 +221,14 @@ int main(int argc, const char *argv[]) {
global_init(); global_init();
scan_args_t *scan_args = scan_args_create(); scan_args_t *scan_args = scan_args_create();
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
index_args_t *index_args = index_args_create(); index_args_t *index_args = index_args_create();
web_args_t *web_args = web_args_create(); web_args_t *web_args = web_args_create();
#endif #endif
int arg_version = 0; int arg_version = 0;
char * common_es_url = NULL; char *common_es_url = NULL;
struct argparse_option options[] = { struct argparse_option options[] = {
OPT_HELP(), OPT_HELP(),
@@ -223,29 +239,33 @@ int main(int argc, const char *argv[]) {
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality, OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"), "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"), OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size, OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"), "Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=4096"),
OPT_STRING(0, "incremental", &scan_args->incremental, OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."), "Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"), OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."), OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"), OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
"Use 0 to only scan files in PATH. DEFAULT: -1"),
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."), OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"), "(You must use this option the first time you use the index command)"),
OPT_GROUP("Web options"), OPT_GROUP("Web options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"), OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"), OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
#endif #endif
OPT_END(), OPT_END(),
}; };
@@ -260,10 +280,10 @@ int main(int argc, const char *argv[]) {
exit(0); exit(0);
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
web_args->es_url = common_es_url; web_args->es_url = common_es_url;
index_args->es_url = common_es_url; index_args->es_url = common_es_url;
#endif #endif
if (argc == 0) { if (argc == 0) {
argparse_usage(&argparse); argparse_usage(&argparse);
@@ -278,7 +298,7 @@ int main(int argc, const char *argv[]) {
} }
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
else if (strcmp(argv[0], "index") == 0) { else if (strcmp(argv[0], "index") == 0) {
int err = index_args_validate(index_args, argc, argv); int err = index_args_validate(index_args, argc, argv);
@@ -296,12 +316,20 @@ int main(int argc, const char *argv[]) {
sist2_web(web_args); sist2_web(web_args);
} }
#endif #endif
else { else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]); fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse); argparse_usage(&argparse);
return 1; return 1;
} }
printf("\n"); printf("\n");
scan_args_destroy(scan_args);
#ifndef SIST_SCAN_ONLY
index_args_destroy(index_args);
web_args_destroy(web_args);
#endif
return 0; return 0;
} }

View File

@@ -1,11 +1,9 @@
#include "font.h" #include "font.h"
#include "ft2build.h"
#include "freetype/freetype.h"
#include "src/ctx.h" #include "src/ctx.h"
__thread FT_Library library = NULL; __thread FT_Library ft_lib = NULL;
typedef struct text_dimensions { typedef struct text_dimensions {
@@ -139,15 +137,15 @@ void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned
} }
void parse_font(const char *buf, size_t buf_len, document_t *doc) { void parse_font(const char *buf, size_t buf_len, document_t *doc) {
if (library == NULL) { if (ft_lib == NULL) {
FT_Init_FreeType(&library); FT_Init_FreeType(&ft_lib);
} }
if (buf == NULL) { if (buf == NULL) {
return; return;
} }
FT_Face face; FT_Face face;
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face); FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
if (err != 0) { if (err != 0) {
return; return;
} }
@@ -169,6 +167,10 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
strcpy(meta_name->strval, font_name); strcpy(meta_name->strval, font_name);
APPEND_META(doc, meta_name) APPEND_META(doc, meta_name)
if (ScanCtx.tn_size <= 0) {
return;
}
int pixel = 64; int pixel = 64;
int num_chars = (int) strlen(font_name); int num_chars = (int) strlen(font_name);

View File

@@ -242,7 +242,7 @@ void parse_media(const char *filepath, document_t *doc) {
} }
} }
if (video_stream != -1) { if (video_stream != -1 && ScanCtx.tn_size > 0) {
AVStream *stream = pFormatCtx->streams[video_stream]; AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {

View File

@@ -1182,6 +1182,9 @@ g_hash_table_insert(ext_table, "d", (gpointer)text_plain);
g_hash_table_insert(ext_table, "cs", (gpointer)text_plain); g_hash_table_insert(ext_table, "cs", (gpointer)text_plain);
g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain); g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain);
g_hash_table_insert(ext_table, "srt", (gpointer)text_plain); g_hash_table_insert(ext_table, "srt", (gpointer)text_plain);
g_hash_table_insert(ext_table, "nfo", (gpointer)text_plain);
g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext); g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);

View File

@@ -44,7 +44,6 @@ void parse(void *arg) {
if (Magic == NULL) { if (Magic == NULL) {
Magic = magic_open(MAGIC_MIME_TYPE); Magic = magic_open(MAGIC_MIME_TYPE);
magic_load(Magic, NULL);
} }
doc.filepath = job->filepath; doc.filepath = job->filepath;

View File

@@ -177,7 +177,17 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
return; return;
} }
fz_page *cover = render_cover(ctx, doc, fzdoc); fz_page *cover = NULL;
if (ScanCtx.tn_size > 0) {
cover = render_cover(ctx, doc, fzdoc);
} else {
fz_var(cover);
fz_try(ctx)
cover = fz_load_page(ctx, fzdoc, 0);
fz_catch(ctx)
cover = NULL;
}
if (cover == NULL) { if (cover == NULL) {
fz_drop_stream(ctx, stream); fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc); fz_drop_document(ctx, fzdoc);
@@ -185,79 +195,81 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
return; return;
} }
fz_stext_options opts = {0}; if (ScanCtx.content_size > 0) {
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size); fz_stext_options opts = {0};
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err);
fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page);
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
for (int current_page = 0; current_page < page_count; current_page++) {
fz_page *page = NULL;
if (current_page == 0) {
page = cover;
} else {
fz_var(err); fz_var(err);
fz_try(ctx) fz_try(ctx)
page = fz_load_page(ctx, fzdoc, current_page); fz_run_page(ctx, page, dev, fz_identity, NULL);
fz_always(ctx)
{
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx) fz_catch(ctx)
err = ctx->error.errcode; err = ctx->error.errcode;
if (err != 0) { if (err != 0) {
text_buffer_destroy(&text_buf); text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page); fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream); fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc); fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx); fz_drop_context(ctx);
return; return;
} }
}
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page)); fz_stext_block *block = stext->first_block;
fz_device *dev = fz_new_stext_device(ctx, stext, &opts); while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
fz_var(err); if (ret == TEXT_BUF_FULL) {
fz_try(ctx) break;
fz_run_page(ctx, page, dev, fz_identity, NULL); }
fz_always(ctx) block = block->next;
{ }
fz_close_device(ctx, dev);
fz_drop_device(ctx, dev);
}
fz_catch(ctx)
err = ctx->error.errcode;
if (err != 0) {
text_buffer_destroy(&text_buf);
fz_drop_page(ctx, page);
fz_drop_stext_page(ctx, stext); fz_drop_stext_page(ctx, stext);
fz_drop_stream(ctx, stream); fz_drop_page(ctx, page);
fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx);
return;
}
fz_stext_block *block = stext->first_block; if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
while (block != NULL) {
int ret = read_stext_block(block, &text_buf);
if (ret == TEXT_BUF_FULL) {
break; break;
} }
block = block->next;
} }
fz_drop_stext_page(ctx, stext); text_buffer_terminate_string(&text_buf);
fz_drop_page(ctx, page);
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) { meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
break; meta_content->key = MetaContent;
} memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
text_buffer_destroy(&text_buf);
} }
text_buffer_terminate_string(&text_buf);
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
meta_content->key = MetaContent;
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
APPEND_META(doc, meta_content)
fz_drop_stream(ctx, stream); fz_drop_stream(ctx, stream);
fz_drop_document(ctx, fzdoc); fz_drop_document(ctx, fzdoc);
fz_drop_context(ctx); fz_drop_context(ctx);
text_buffer_destroy(&text_buf);
} }

View File

@@ -26,6 +26,8 @@
#include <pthread.h> #include <pthread.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <wordexp.h> #include <wordexp.h>
#include "ft2build.h"
#include "freetype/freetype.h"
#ifndef SIST_SCAN_ONLY #ifndef SIST_SCAN_ONLY
#include <onion/onion.h> #include <onion/onion.h>

View File

@@ -114,12 +114,18 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) { if (work != NULL) {
if (pool->stop) {
break;
}
work->func(work->arg); work->func(work->arg);
free(work); free(work);
} }
pthread_mutex_lock(&(pool->work_mutex)); pthread_mutex_lock(&(pool->work_mutex));
pool->done_cnt++; if (work != NULL) {
pool->done_cnt++;
}
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size); progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
@@ -142,11 +148,15 @@ void tpool_wait(tpool_t *pool) {
if (pool->done_cnt < pool->work_cnt) { if (pool->done_cnt < pool->work_cnt) {
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex)); pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
} else { } else {
pool->stop = 1; usleep(500000);
break; if (pool->done_cnt == pool->work_cnt) {
pool->stop = 1;
usleep(1000000);
break;
}
} }
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
} }
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
pthread_mutex_unlock(&(pool->work_mutex)); pthread_mutex_unlock(&(pool->work_mutex));
} }
@@ -169,7 +179,8 @@ void tpool_destroy(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) { for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i]; pthread_t thread = pool->threads[i];
if (thread != 0) { if (thread != 0) {
pthread_cancel(thread); void *_;
pthread_join(thread, &_);
} }
} }
@@ -209,8 +220,6 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
void tpool_start(tpool_t *pool) { void tpool_start(tpool_t *pool) {
for (size_t i = 0; i < pool->thread_cnt; i++) { for (size_t i = 0; i < pool->thread_cnt; i++) {
pthread_t thread = pool->threads[i]; pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
pthread_create(&thread, NULL, tpool_worker, pool);
pthread_detach(thread);
} }
} }

View File

@@ -1,4 +1,4 @@
#import "auth_basic.h" #include "auth_basic.h"
#define UNAUTHORIZED_TEXT "Unauthorized" #define UNAUTHORIZED_TEXT "Unauthorized"

File diff suppressed because one or more lines are too long

View File

@@ -345,7 +345,8 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
function createDocLine(hit) { function createDocLine(hit) {
let mimeCategory = hit["_source"]["mime"].split("/")[0]; const mime = hit["_source"]["mime"];
let mimeCategory = mime ? mime.split("/")[0] : null;
let tags = getTags(hit, mimeCategory); let tags = getTags(hit, mimeCategory);
let imgWrapper = document.createElement("div"); let imgWrapper = document.createElement("div");

View File

@@ -357,6 +357,7 @@ function search() {
post_tags: ["</mark>"], post_tags: ["</mark>"],
fields: { fields: {
content: {}, content: {},
// "content.nGram": {},
name: {}, name: {},
"name.nGram": {}, "name.nGram": {},
font_name: {}, font_name: {},

View File

@@ -11,7 +11,7 @@
<nav class="navbar navbar-expand-lg"> <nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="/">sist2</a> <a class="navbar-brand" href="/">sist2</a>
<span class="badge badge-pill version">v1.1.7</span> <span class="badge badge-pill version">v1.1.9</span>
<span class="tagline">Lightning-fast file system indexer and search tool </span> <span class="tagline">Lightning-fast file system indexer and search tool </span>
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a> <a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
</nav> </nav>