mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d816dae8b3 | |||
| 4346c3e063 | |||
| 1a1032a8a7 | |||
| 4ab2ba1a02 | |||
| d089601dc5 | |||
| 11df6cc88f | |||
| 373ac01e4e |
@@ -157,8 +157,8 @@ if (WITH_SIST2)
|
||||
m
|
||||
bz2
|
||||
magic
|
||||
harfbuzz
|
||||
openjp2
|
||||
${PROJECT_SOURCE_DIR}/lib/libharfbuzz.a
|
||||
${PROJECT_SOURCE_DIR}/lib/libopenjp2.a
|
||||
freetype
|
||||
)
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
rm ./sist2
|
||||
cp ../sist2 .
|
||||
|
||||
version=$(./sist2 --version)
|
||||
|
||||
2
mime.csv
2
mime.csv
@@ -252,7 +252,7 @@ text/html, acgi|htm|html|htmls|htx|shtml
|
||||
text/javascript, js
|
||||
text/mcf, mcf
|
||||
text/pascal, pas
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt
|
||||
text/plain, com|cmd|conf|def|g|idc|list|lst|mar|sdml|text|txt|md|groovy|license|properties|desktop|ini|rst|cmake|ipynb|readme|less|lo|go|yml|d|cs|hpp|srt|nfo|sfv|m3u
|
||||
text/richtext, rt|rtf|rtx
|
||||
text/rtf,
|
||||
text/scriplet, wsc
|
||||
|
||||
|
48
src/cli.c
48
src/cli.c
@@ -7,6 +7,7 @@
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
#define DEFAULT_BATCH_SIZE 100
|
||||
|
||||
#define DEFAULT_BIND_ADDR "localhost"
|
||||
#define DEFAULT_PORT "4090"
|
||||
@@ -14,9 +15,37 @@
|
||||
|
||||
scan_args_t *scan_args_create() {
|
||||
scan_args_t *args = calloc(sizeof(scan_args_t), 1);
|
||||
|
||||
args->depth = -1;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
void scan_args_destroy(scan_args_t *args) {
|
||||
if (args->name != NULL) {
|
||||
free(args->name);
|
||||
}
|
||||
if (args->path != NULL) {
|
||||
free(args->path);
|
||||
}
|
||||
if (args->output != NULL) {
|
||||
free(args->output);
|
||||
}
|
||||
free(args);
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
void index_args_destroy(index_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
}
|
||||
|
||||
void web_args_destroy(web_args_t *args) {
|
||||
//todo
|
||||
free(args);
|
||||
}
|
||||
#endif
|
||||
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "Required positional argument: PATH.\n");
|
||||
@@ -48,16 +77,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
if (args->size == 0) {
|
||||
args->size = DEFAULT_SIZE;
|
||||
} else if (args->size <= 0) {
|
||||
fprintf(stderr, "Invalid size: %d\n", args->size);
|
||||
} else if (args->size > 0 && args->size < 32) {
|
||||
printf("Invalid size: %d\n", args->content_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->content_size == 0) {
|
||||
args->content_size = DEFAULT_CONTENT_SIZE;
|
||||
} else if (args->content_size <= 0) {
|
||||
fprintf(stderr, "Invalid content-size: %d\n", args->content_size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->threads == 0) {
|
||||
@@ -80,6 +106,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (args->depth < 0) {
|
||||
args->depth = G_MAXINT32;
|
||||
} else {
|
||||
args->depth += 1;
|
||||
}
|
||||
|
||||
if (args->name == NULL) {
|
||||
args->name = g_path_get_basename(args->output);
|
||||
}
|
||||
@@ -104,6 +136,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
free(index_path);
|
||||
}
|
||||
|
||||
if (args->es_url == NULL) {
|
||||
@@ -135,6 +168,11 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
|
||||
if (args->batch_size == 0) {
|
||||
args->batch_size = DEFAULT_BATCH_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,10 +12,12 @@ typedef struct scan_args {
|
||||
char *output;
|
||||
char *rewrite_url;
|
||||
char *name;
|
||||
int depth;
|
||||
char *path;
|
||||
} scan_args_t;
|
||||
|
||||
scan_args_t *scan_args_create();
|
||||
void scan_args_destroy(scan_args_t *args);
|
||||
int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
@@ -25,6 +27,7 @@ typedef struct index_args {
|
||||
const char *script_path;
|
||||
char *script;
|
||||
int print;
|
||||
int batch_size;
|
||||
int force_reset;
|
||||
} index_args_t;
|
||||
|
||||
@@ -39,7 +42,10 @@ typedef struct web_args {
|
||||
} web_args_t;
|
||||
|
||||
index_args_t *index_args_create();
|
||||
void index_args_destroy(index_args_t *args);
|
||||
|
||||
web_args_t *web_args_create();
|
||||
void web_args_destroy(web_args_t *args);
|
||||
|
||||
int index_args_validate(index_args_t *args, int argc, const char **argv);
|
||||
int web_args_validate(web_args_t *args, int argc, const char **argv);
|
||||
|
||||
@@ -15,6 +15,7 @@ struct {
|
||||
int threads;
|
||||
int content_size;
|
||||
float tn_qscale;
|
||||
int depth;
|
||||
|
||||
size_t stat_tn_size;
|
||||
size_t stat_index_size;
|
||||
@@ -29,12 +30,13 @@ struct {
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
struct {
|
||||
char *es_url;
|
||||
int batch_size;
|
||||
} IndexCtx;
|
||||
|
||||
struct {
|
||||
char *es_url;
|
||||
int index_count;
|
||||
char* b64credentials;
|
||||
char *b64credentials;
|
||||
struct index_t indices[16];
|
||||
} WebCtx;
|
||||
#endif
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "static_generated.c"
|
||||
|
||||
#define BULK_INDEX_SIZE 100
|
||||
|
||||
typedef struct es_indexer {
|
||||
int queued;
|
||||
@@ -173,7 +172,7 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
Indexer->queued += 1;
|
||||
|
||||
if (Indexer->queued >= BULK_INDEX_SIZE) {
|
||||
if (Indexer->queued >= IndexCtx.batch_size) {
|
||||
elastic_flush();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "src/ctx.h"
|
||||
#include "serialize.h"
|
||||
|
||||
static __thread int IndexFd = -1;
|
||||
static __thread int index_fd = -1;
|
||||
|
||||
typedef struct {
|
||||
unsigned char uuid[16];
|
||||
@@ -119,13 +119,13 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
|
||||
void write_document(document_t *doc) {
|
||||
|
||||
if (IndexFd == -1) {
|
||||
if (index_fd == -1) {
|
||||
char dstfile[PATH_MAX];
|
||||
pthread_t self = pthread_self();
|
||||
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
|
||||
IndexFd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
|
||||
index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (IndexFd == -1) {
|
||||
if (index_fd == -1) {
|
||||
perror("open");
|
||||
}
|
||||
}
|
||||
@@ -158,13 +158,16 @@ void write_document(document_t *doc) {
|
||||
}
|
||||
dyn_buffer_write_char(&buf, '\n');
|
||||
|
||||
write(IndexFd, buf.buf, buf.cur);
|
||||
int res = write(index_fd, buf.buf, buf.cur);
|
||||
if (res == -1) {
|
||||
perror("write");
|
||||
}
|
||||
ScanCtx.stat_index_size += buf.cur;
|
||||
dyn_buffer_destroy(&buf);
|
||||
}
|
||||
|
||||
void serializer_cleanup() {
|
||||
close(IndexFd);
|
||||
void thread_cleanup() {
|
||||
close(index_fd);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func func) {
|
||||
|
||||
@@ -18,7 +18,7 @@ void incremental_read(GHashTable *table, const char *filepath);
|
||||
/**
|
||||
* Must be called after write_document
|
||||
*/
|
||||
void serializer_cleanup();
|
||||
void thread_cleanup();
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc);
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ parse_job_t *create_parse_job(const char *filepath, const struct stat *info, int
|
||||
}
|
||||
|
||||
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
if (ftw->level <= ScanCtx.depth && typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
parse_job_t *job = create_parse_job(filepath, info, ftw->base);
|
||||
tpool_add_work(ScanCtx.pool, parse, job);
|
||||
}
|
||||
|
||||
68
src/main.c
68
src/main.c
@@ -10,7 +10,7 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.1.7";
|
||||
static const char *const Version = "1.1.9";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
@@ -19,9 +19,9 @@ static const char *const usage[] = {
|
||||
};
|
||||
|
||||
void global_init() {
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
curl_global_init(CURL_GLOBAL_NOTHING);
|
||||
#endif
|
||||
#endif
|
||||
av_log_set_level(AV_LOG_QUIET);
|
||||
}
|
||||
|
||||
@@ -41,10 +41,22 @@ void init_dir(const char *dirpath) {
|
||||
void scan_print_header() {
|
||||
printf("sist2 V%s\n", Version);
|
||||
printf("---------------------\n");
|
||||
printf("threads\t\t%d\n", ScanCtx.threads);
|
||||
printf("tn_qscale\t%.1f/31.0\n", ScanCtx.tn_qscale);
|
||||
printf("tn_size\t\t%dpx\n", ScanCtx.tn_size);
|
||||
printf("output\t\t%s\n", ScanCtx.index.path);
|
||||
printf("threads\t\t\t%d\n", ScanCtx.threads);
|
||||
printf("tn_qscale\t\t%.1f/31.0\n", ScanCtx.tn_qscale);
|
||||
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
printf("tn_size\t\t\t%dpx\n", ScanCtx.tn_size);
|
||||
} else {
|
||||
printf("tn_size\t\t\tdisabled\n");
|
||||
}
|
||||
|
||||
if (ScanCtx.content_size > 0) {
|
||||
printf("content_size\t%d B\n", ScanCtx.content_size);
|
||||
} else {
|
||||
printf("content_size\t\t\tdisabled\n");
|
||||
}
|
||||
|
||||
printf("output\t\t\t%s\n", ScanCtx.index.path);
|
||||
}
|
||||
|
||||
void sist2_scan(scan_args_t *args) {
|
||||
@@ -53,6 +65,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
ScanCtx.tn_size = args->size;
|
||||
ScanCtx.content_size = args->content_size;
|
||||
ScanCtx.threads = args->threads;
|
||||
ScanCtx.depth = args->depth;
|
||||
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
|
||||
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
|
||||
strncpy(ScanCtx.index.desc.root, args->path, sizeof(ScanCtx.index.desc.root));
|
||||
@@ -92,7 +105,7 @@ void sist2_scan(scan_args_t *args) {
|
||||
printf("Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table));
|
||||
}
|
||||
|
||||
ScanCtx.pool = tpool_create(args->threads, serializer_cleanup);
|
||||
ScanCtx.pool = tpool_create(args->threads, thread_cleanup);
|
||||
tpool_start(ScanCtx.pool);
|
||||
walk_directory_tree(ScanCtx.index.desc.root);
|
||||
tpool_wait(ScanCtx.pool);
|
||||
@@ -125,9 +138,11 @@ void sist2_scan(scan_args_t *args) {
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
|
||||
void sist2_index(index_args_t *args) {
|
||||
|
||||
IndexCtx.es_url = args->es_url;
|
||||
IndexCtx.batch_size = args->batch_size;
|
||||
|
||||
if (!args->print) {
|
||||
elastic_init(args->force_reset);
|
||||
@@ -197,6 +212,7 @@ void sist2_web(web_args_t *args) {
|
||||
|
||||
serve(args->bind, args->port);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -205,14 +221,14 @@ int main(int argc, const char *argv[]) {
|
||||
global_init();
|
||||
|
||||
scan_args_t *scan_args = scan_args_create();
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
index_args_t *index_args = index_args_create();
|
||||
web_args_t *web_args = web_args_create();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char * common_es_url = NULL;
|
||||
char *common_es_url = NULL;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
@@ -223,29 +239,33 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size,
|
||||
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
|
||||
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=4096"),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
"Reuse an existing index and only scan modified files."),
|
||||
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
|
||||
OPT_STRING(0, "rewrite-url", &scan_args->rewrite_url, "Serve files from this url instead of from disk."),
|
||||
OPT_STRING(0, "name", &scan_args->name, "Index display name. DEFAULT: (name of the directory)"),
|
||||
OPT_INTEGER(0, "depth", &scan_args->depth, "Scan up to DEPTH subdirectories deep. "
|
||||
"Use 0 to only scan files in PATH. DEFAULT: -1"),
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
OPT_GROUP("Web options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_STRING(0, "bind", &web_args->bind, "Listen on this address. DEFAULT=localhost"),
|
||||
OPT_STRING(0, "port", &web_args->port, "Listen on this port. DEFAULT=4090"),
|
||||
OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"),
|
||||
#endif
|
||||
#endif
|
||||
|
||||
OPT_END(),
|
||||
};
|
||||
@@ -260,10 +280,10 @@ int main(int argc, const char *argv[]) {
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (argc == 0) {
|
||||
argparse_usage(&argparse);
|
||||
@@ -278,7 +298,7 @@ int main(int argc, const char *argv[]) {
|
||||
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
else if (strcmp(argv[0], "index") == 0) {
|
||||
|
||||
int err = index_args_validate(index_args, argc, argv);
|
||||
@@ -296,12 +316,20 @@ int main(int argc, const char *argv[]) {
|
||||
sist2_web(web_args);
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
else {
|
||||
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
|
||||
argparse_usage(&argparse);
|
||||
return 1;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
scan_args_destroy(scan_args);
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
index_args_destroy(index_args);
|
||||
web_args_destroy(web_args);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
#include "font.h"
|
||||
|
||||
#include "ft2build.h"
|
||||
#include "freetype/freetype.h"
|
||||
|
||||
#include "src/ctx.h"
|
||||
|
||||
__thread FT_Library library = NULL;
|
||||
__thread FT_Library ft_lib = NULL;
|
||||
|
||||
|
||||
typedef struct text_dimensions {
|
||||
@@ -139,15 +137,15 @@ void bmp_format(dyn_buffer_t *buf, text_dimensions_t dimensions, const unsigned
|
||||
}
|
||||
|
||||
void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
if (library == NULL) {
|
||||
FT_Init_FreeType(&library);
|
||||
if (ft_lib == NULL) {
|
||||
FT_Init_FreeType(&ft_lib);
|
||||
}
|
||||
if (buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
FT_Face face;
|
||||
FT_Error err = FT_New_Memory_Face(library, (unsigned char *) buf, buf_len, 0, &face);
|
||||
FT_Error err = FT_New_Memory_Face(ft_lib, (unsigned char *) buf, buf_len, 0, &face);
|
||||
if (err != 0) {
|
||||
return;
|
||||
}
|
||||
@@ -169,6 +167,10 @@ void parse_font(const char *buf, size_t buf_len, document_t *doc) {
|
||||
strcpy(meta_name->strval, font_name);
|
||||
APPEND_META(doc, meta_name)
|
||||
|
||||
if (ScanCtx.tn_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int pixel = 64;
|
||||
int num_chars = (int) strlen(font_name);
|
||||
|
||||
|
||||
@@ -242,7 +242,7 @@ void parse_media(const char *filepath, document_t *doc) {
|
||||
}
|
||||
}
|
||||
|
||||
if (video_stream != -1) {
|
||||
if (video_stream != -1 && ScanCtx.tn_size > 0) {
|
||||
AVStream *stream = pFormatCtx->streams[video_stream];
|
||||
|
||||
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
|
||||
|
||||
@@ -1182,6 +1182,9 @@ g_hash_table_insert(ext_table, "d", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "cs", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "hpp", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "srt", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "nfo", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "sfv", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "m3u", (gpointer)text_plain);
|
||||
g_hash_table_insert(ext_table, "rt", (gpointer)text_richtext);
|
||||
g_hash_table_insert(ext_table, "rtf", (gpointer)text_richtext);
|
||||
g_hash_table_insert(ext_table, "rtx", (gpointer)text_richtext);
|
||||
|
||||
@@ -44,7 +44,6 @@ void parse(void *arg) {
|
||||
|
||||
if (Magic == NULL) {
|
||||
Magic = magic_open(MAGIC_MIME_TYPE);
|
||||
magic_load(Magic, NULL);
|
||||
}
|
||||
|
||||
doc.filepath = job->filepath;
|
||||
|
||||
@@ -177,7 +177,17 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
|
||||
return;
|
||||
}
|
||||
|
||||
fz_page *cover = render_cover(ctx, doc, fzdoc);
|
||||
fz_page *cover = NULL;
|
||||
if (ScanCtx.tn_size > 0) {
|
||||
cover = render_cover(ctx, doc, fzdoc);
|
||||
} else {
|
||||
fz_var(cover);
|
||||
fz_try(ctx)
|
||||
cover = fz_load_page(ctx, fzdoc, 0);
|
||||
fz_catch(ctx)
|
||||
cover = NULL;
|
||||
}
|
||||
|
||||
if (cover == NULL) {
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
@@ -185,79 +195,81 @@ void parse_pdf(void *buf, size_t buf_len, document_t *doc) {
|
||||
return;
|
||||
}
|
||||
|
||||
fz_stext_options opts = {0};
|
||||
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
|
||||
if (ScanCtx.content_size > 0) {
|
||||
fz_stext_options opts = {0};
|
||||
text_buffer_t text_buf = text_buffer_create(ScanCtx.content_size);
|
||||
|
||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||
fz_page *page = NULL;
|
||||
if (current_page == 0) {
|
||||
page = cover;
|
||||
} else {
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page = fz_load_page(ctx, fzdoc, current_page);
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
if (err != 0) {
|
||||
text_buffer_destroy(&text_buf);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
|
||||
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
|
||||
|
||||
for (int current_page = 0; current_page < page_count; current_page++) {
|
||||
fz_page *page = NULL;
|
||||
if (current_page == 0) {
|
||||
page = cover;
|
||||
} else {
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
page = fz_load_page(ctx, fzdoc, current_page);
|
||||
fz_run_page(ctx, page, dev, fz_identity, NULL);
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
text_buffer_destroy(&text_buf);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fz_stext_page *stext = fz_new_stext_page(ctx, fz_bound_page(ctx, page));
|
||||
fz_device *dev = fz_new_stext_device(ctx, stext, &opts);
|
||||
|
||||
fz_var(err);
|
||||
fz_try(ctx)
|
||||
fz_run_page(ctx, page, dev, fz_identity, NULL);
|
||||
fz_always(ctx)
|
||||
{
|
||||
fz_close_device(ctx, dev);
|
||||
fz_drop_device(ctx, dev);
|
||||
}
|
||||
fz_catch(ctx)
|
||||
err = ctx->error.errcode;
|
||||
|
||||
if (err != 0) {
|
||||
text_buffer_destroy(&text_buf);
|
||||
fz_drop_page(ctx, page);
|
||||
fz_stext_block *block = stext->first_block;
|
||||
while (block != NULL) {
|
||||
int ret = read_stext_block(block, &text_buf);
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
return;
|
||||
}
|
||||
fz_drop_page(ctx, page);
|
||||
|
||||
fz_stext_block *block = stext->first_block;
|
||||
while (block != NULL) {
|
||||
int ret = read_stext_block(block, &text_buf);
|
||||
if (ret == TEXT_BUF_FULL) {
|
||||
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
|
||||
break;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
fz_drop_stext_page(ctx, stext);
|
||||
fz_drop_page(ctx, page);
|
||||
text_buffer_terminate_string(&text_buf);
|
||||
|
||||
if (text_buf.dyn_buffer.cur >= text_buf.dyn_buffer.size) {
|
||||
break;
|
||||
}
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
text_buffer_destroy(&text_buf);
|
||||
}
|
||||
text_buffer_terminate_string(&text_buf);
|
||||
|
||||
meta_line_t *meta_content = malloc(sizeof(meta_line_t) + text_buf.dyn_buffer.cur);
|
||||
meta_content->key = MetaContent;
|
||||
memcpy(meta_content->strval, text_buf.dyn_buffer.buf, text_buf.dyn_buffer.cur);
|
||||
APPEND_META(doc, meta_content)
|
||||
|
||||
fz_drop_stream(ctx, stream);
|
||||
fz_drop_document(ctx, fzdoc);
|
||||
fz_drop_context(ctx);
|
||||
|
||||
text_buffer_destroy(&text_buf);
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include <pthread.h>
|
||||
#include <sys/stat.h>
|
||||
#include <wordexp.h>
|
||||
#include "ft2build.h"
|
||||
#include "freetype/freetype.h"
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
#include <onion/onion.h>
|
||||
|
||||
25
src/tpool.c
25
src/tpool.c
@@ -114,12 +114,18 @@ static void *tpool_worker(void *arg) {
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
|
||||
if (work != NULL) {
|
||||
if (pool->stop) {
|
||||
break;
|
||||
}
|
||||
|
||||
work->func(work->arg);
|
||||
free(work);
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&(pool->work_mutex));
|
||||
pool->done_cnt++;
|
||||
if (work != NULL) {
|
||||
pool->done_cnt++;
|
||||
}
|
||||
|
||||
progress_bar_print((double) pool->done_cnt / pool->work_cnt, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
|
||||
@@ -142,11 +148,15 @@ void tpool_wait(tpool_t *pool) {
|
||||
if (pool->done_cnt < pool->work_cnt) {
|
||||
pthread_cond_wait(&(pool->working_cond), &(pool->work_mutex));
|
||||
} else {
|
||||
pool->stop = 1;
|
||||
break;
|
||||
usleep(500000);
|
||||
if (pool->done_cnt == pool->work_cnt) {
|
||||
pool->stop = 1;
|
||||
usleep(1000000);
|
||||
break;
|
||||
}
|
||||
}
|
||||
progress_bar_print(100.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
}
|
||||
progress_bar_print(1.0, ScanCtx.stat_tn_size, ScanCtx.stat_index_size);
|
||||
pthread_mutex_unlock(&(pool->work_mutex));
|
||||
}
|
||||
|
||||
@@ -169,7 +179,8 @@ void tpool_destroy(tpool_t *pool) {
|
||||
for (size_t i = 0; i < pool->thread_cnt; i++) {
|
||||
pthread_t thread = pool->threads[i];
|
||||
if (thread != 0) {
|
||||
pthread_cancel(thread);
|
||||
void *_;
|
||||
pthread_join(thread, &_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,8 +220,6 @@ tpool_t *tpool_create(size_t thread_cnt, void cleanup_func()) {
|
||||
void tpool_start(tpool_t *pool) {
|
||||
|
||||
for (size_t i = 0; i < pool->thread_cnt; i++) {
|
||||
pthread_t thread = pool->threads[i];
|
||||
pthread_create(&thread, NULL, tpool_worker, pool);
|
||||
pthread_detach(thread);
|
||||
pthread_create(&pool->threads[i], NULL, tpool_worker, pool);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#import "auth_basic.h"
|
||||
#include "auth_basic.h"
|
||||
|
||||
#define UNAUTHORIZED_TEXT "Unauthorized"
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -345,7 +345,8 @@ function makeThumbnail(mimeCategory, hit, imgWrapper, small) {
|
||||
|
||||
function createDocLine(hit) {
|
||||
|
||||
let mimeCategory = hit["_source"]["mime"].split("/")[0];
|
||||
const mime = hit["_source"]["mime"];
|
||||
let mimeCategory = mime ? mime.split("/")[0] : null;
|
||||
let tags = getTags(hit, mimeCategory);
|
||||
|
||||
let imgWrapper = document.createElement("div");
|
||||
|
||||
@@ -357,6 +357,7 @@ function search() {
|
||||
post_tags: ["</mark>"],
|
||||
fields: {
|
||||
content: {},
|
||||
// "content.nGram": {},
|
||||
name: {},
|
||||
"name.nGram": {},
|
||||
font_name: {},
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
<nav class="navbar navbar-expand-lg">
|
||||
<a class="navbar-brand" href="/">sist2</a>
|
||||
<span class="badge badge-pill version">v1.1.7</span>
|
||||
<span class="badge badge-pill version">v1.1.9</span>
|
||||
<span class="tagline">Lightning-fast file system indexer and search tool </span>
|
||||
<a style="margin-left: auto" id="theme" class="btn" title="Toggle theme" href="/">Theme</a>
|
||||
</nav>
|
||||
|
||||
Reference in New Issue
Block a user