diff --git a/CMakeLists.txt b/CMakeLists.txt index 86c129c..5380c45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ add_executable( third-party/argparse/argparse.h third-party/argparse/argparse.c src/cli.c src/cli.h - src/stats.c src/stats.h) + src/stats.c src/stats.h src/ctx.c) target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) diff --git a/README.md b/README.md index 46c0a14..c3137db 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,6 @@ binaries (GCC 7+ required). 2. Build ```bash git clone --recursive https://github.com/simon987/sist2/ - cmake -D /scripts/buildsystems/vcpkg.cmake . + cmake -DCMAKE_TOOLCHAIN_FILE=/scripts/buildsystems/vcpkg.cmake . make ``` diff --git a/src/cli.c b/src/cli.c index d0c390f..8c24f5a 100644 --- a/src/cli.c +++ b/src/cli.c @@ -16,7 +16,7 @@ #define DEFAULT_MAX_MEM_BUFFER 2000 -const char* TESS_DATAPATHS[] = { +const char *TESS_DATAPATHS[] = { "/usr/share/tessdata/", "/usr/share/tesseract-ocr/tessdata/", "./", @@ -32,6 +32,11 @@ scan_args_t *scan_args_create() { return args; } +exec_args_t *exec_args_create() { + exec_args_t *args = calloc(sizeof(exec_args_t), 1); + return args; +} + void scan_args_destroy(scan_args_t *args) { if (args->name != NULL) { free(args->name); @@ -55,6 +60,10 @@ void web_args_destroy(web_args_t *args) { free(args); } +void exec_args_destroy(exec_args_t *args) { + free(args); +} + int scan_args_validate(scan_args_t *args, int argc, const char **argv) { if (argc < 2) { fprintf(stderr, "Required positional argument: PATH.\n"); @@ -115,7 +124,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { return 1; } - if (args->depth < 0) { + if (args->depth <= 0) { args->depth = G_MAXINT32; } else { args->depth += 1; @@ -147,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { char filename[128]; sprintf(filename, "%s.traineddata", args->tesseract_lang); - const char * path = find_file_in_paths(TESS_DATAPATHS, filename); + const char *path = find_file_in_paths(TESS_DATAPATHS, filename); if (path == NULL) { LOG_FATAL("cli.c", "Could not find tesseract language file!"); } @@ -214,6 +223,34 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) { return 0; } +int load_script(const char *script_path, char **dst) { + struct stat info; + int res = stat(script_path, &info); + + if (res == -1) { + fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno)); + return 1; + } + + int fd = open(script_path, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "Error opening script file '%s': %s\n", script_path, strerror(errno)); + return 1; + } + + *dst = malloc(info.st_size + 1); + res = read(fd, *dst, info.st_size); + if (res < 0) { + fprintf(stderr, "Error reading script file '%s': %s\n", script_path, strerror(errno)); + return 1; + } + + *(*dst + info.st_size) = '\0'; + close(fd); + + return 0; +} + int index_args_validate(index_args_t *args, int argc, const char **argv) { LogCtx.verbose = 1; @@ -237,29 +274,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) { } if (args->script_path != NULL) { - struct stat info; - int res = stat(args->script_path, &info); - - if (res == -1) { - fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno)); + if (load_script(args->script_path, &args->script) != 0) { return 1; } - - int fd = open(args->script_path, O_RDONLY); - if (fd == -1) { - fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno)); - return 1; - } - - args->script = malloc(info.st_size + 1); - res = read(fd, args->script, info.st_size); - if (res < 0) { - fprintf(stderr, "Error reading script file '%s': %s\n", args->script_path, strerror(errno)); - return 1; - } - - *(args->script + info.st_size) = '\0'; - close(fd); } if (args->batch_size == 0) { @@ -295,7 +312,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) { } if (args->credentials != NULL) { - char * ptr = strstr(args->credentials, ":"); + char *ptr = strstr(args->credentials, ":"); if (ptr == NULL) { fprintf(stderr, "Invalid --auth format, see usage\n"); return 1; @@ -348,3 +365,30 @@ web_args_t *web_args_create() { return args; } +int exec_args_validate(exec_args_t *args, int argc, const char **argv) { + + char *index_path = abspath(argv[1]); + if (index_path == NULL) { + fprintf(stderr, "File not found: %s\n", argv[1]); + return 1; + } else { + args->index_path = argv[1]; + free(index_path); + } + + if (args->es_url == NULL) { + args->es_url = DEFAULT_ES_URL; + } + + if (args->script_path == NULL) { + LOG_FATAL("cli.c", "--script-file argument is required"); + } + + if (load_script(args->script_path, &args->script) != 0) { + return 1; + } + + LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path) + LOG_DEBUGF("cli.c", "arg script=%s", args->script) + return 0; +} diff --git a/src/cli.h b/src/cli.h index 94541ee..6da2bdf 100644 --- a/src/cli.h +++ b/src/cli.h @@ -54,6 +54,13 @@ typedef struct web_args { const char **indices; } web_args_t; +typedef struct exec_args { + char *es_url; + const char *index_path; + const char *script_path; + char *script; +} exec_args_t; + index_args_t *index_args_create(); void index_args_destroy(index_args_t *args); @@ -66,4 +73,10 @@ int index_args_validate(index_args_t *args, int argc, const char **argv); int web_args_validate(web_args_t *args, int argc, const char **argv); +exec_args_t *exec_args_create(); + +void exec_args_destroy(exec_args_t *args); + +int exec_args_validate(exec_args_t *args, int argc, const char **argv); + #endif diff --git a/src/ctx.h b/src/ctx.h index 2fdf122..9c1970f 100644 --- a/src/ctx.h +++ b/src/ctx.h @@ -16,8 +16,7 @@ #include #include -//TODO Move to individual scan ctx -struct { +typedef struct { struct index_t index; GHashTable *mime_table; @@ -46,27 +45,32 @@ struct { scan_ooxml_ctx_t ooxml_ctx; scan_text_ctx_t text_ctx; scan_mobi_ctx_t mobi_ctx; -} ScanCtx; +} ScanCtx_t; -struct { +typedef struct { int verbose; int very_verbose; int no_color; -} LogCtx; +} LogCtx_t; -struct { +typedef struct { char *es_url; int batch_size; -} IndexCtx; +} IndexCtx_t; -struct { +typedef struct { char *es_url; int index_count; char *auth_user; char *auth_pass; int auth_enabled; - struct index_t indices[16]; -} WebCtx; + struct index_t indices[64]; +} WebCtx_t; + +extern ScanCtx_t ScanCtx; +extern WebCtx_t WebCtx; +extern IndexCtx_t IndexCtx; +extern LogCtx_t LogCtx; #endif diff --git a/src/index/elastic.c b/src/index/elastic.c index 125b3e8..1e5e776 100644 --- a/src/index/elastic.c +++ b/src/index/elastic.c @@ -53,6 +53,10 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) { void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) { + if (Indexer == NULL) { + Indexer = create_indexer(IndexCtx.es_url); + } + cJSON *body = cJSON_CreateObject(); cJSON *script_obj = cJSON_AddObjectToObject(body, "script"); cJSON_AddStringToObject(script_obj, "lang", "painless"); @@ -264,6 +268,7 @@ void destroy_indexer(char *script, char index_id[UUID_STR_LEN]) { if (script != NULL) { execute_update_script(script, index_id); + free(script); } snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url); diff --git a/src/index/elastic.h b/src/index/elastic.h index a0d06cb..b42ad65 100644 --- a/src/index/elastic.h +++ b/src/index/elastic.h @@ -32,4 +32,6 @@ cJSON *elastic_get_document(const char *uuid_str); char *elastic_get_status(); +void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]); + #endif diff --git a/src/main.c b/src/main.c index 1cc3971..fa5b1d4 100644 --- a/src/main.c +++ b/src/main.c @@ -2,7 +2,6 @@ #include "ctx.h" #include -#include #include #include "cli.h" @@ -22,11 +21,12 @@ #define EPILOG "Made by simon987 . Released under GPL-3.0" -static const char *const Version = "2.4.2"; +static const char *const Version = "2.4.3"; static const char *const usage[] = { "sist2 scan [OPTION]... PATH", "sist2 index [OPTION]... INDEX", "sist2 web [OPTION]... INDEX...", + "sist2 exec-script [OPTION]... INDEX", NULL, }; @@ -287,6 +287,20 @@ void sist2_index(index_args_t *args) { } } +void sist2_exec_script(exec_args_t *args) { + + char descriptor_path[PATH_MAX]; + snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path); + index_descriptor_t desc = read_index_descriptor(descriptor_path); + + IndexCtx.es_url = args->es_url; + + LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type) + + execute_update_script(args->script, desc.uuid); + free(args->script); +} + void sist2_web(web_args_t *args) { WebCtx.es_url = args->es_url; @@ -323,10 +337,12 @@ int main(int argc, const char *argv[]) { scan_args_t *scan_args = scan_args_create(); index_args_t *index_args = index_args_create(); web_args_t *web_args = web_args_create(); + exec_args_t *exec_args = exec_args_create(); int arg_version = 0; char *common_es_url = NULL; + char *common_script_path = NULL; struct argparse_option options[] = { OPT_HELP(), @@ -366,7 +382,7 @@ int main(int argc, const char *argv[]) { OPT_GROUP("Index options"), OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"), OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."), - OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."), + OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), OPT_INTEGER(0, "batch-size", &index_args->batch_size, "Index batch size. DEFAULT: 100"), OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. " "(You must use this option the first time you use the index command)"), @@ -376,6 +392,9 @@ int main(int argc, const char *argv[]) { OPT_STRING(0, "bind", &web_args->listen_address, "Listen on this address. DEFAULT=localhost:4090"), OPT_STRING(0, "auth", &web_args->credentials, "Basic auth in user:password format"), + OPT_GROUP("Exec-script options"), + OPT_STRING(0, "script-file", &common_script_path, "Path to user script."), + OPT_END(), }; @@ -395,6 +414,8 @@ int main(int argc, const char *argv[]) { web_args->es_url = common_es_url; index_args->es_url = common_es_url; + index_args->script_path = common_script_path; + exec_args->script_path = common_script_path; if (argc == 0) { argparse_usage(&argparse); @@ -423,6 +444,14 @@ int main(int argc, const char *argv[]) { } sist2_web(web_args); + } else if (strcmp(argv[0], "exec-script") == 0) { + + int err = exec_args_validate(exec_args, argc, argv); + if (err != 0) { + goto end; + } + sist2_exec_script(exec_args); + } else { fprintf(stderr, "Invalid command: '%s'\n", argv[0]); argparse_usage(&argparse); @@ -434,6 +463,7 @@ int main(int argc, const char *argv[]) { scan_args_destroy(scan_args); index_args_destroy(index_args); web_args_destroy(web_args); + exec_args_destroy(exec_args); return 0; } diff --git a/src/static/search.html b/src/static/search.html index ee5fa2a..177a786 100644 --- a/src/static/search.html +++ b/src/static/search.html @@ -11,7 +11,7 @@