User scripts, bug fixes, docker image

2025-12-11 14:38:54 +00:00 · 2019-11-12 20:58:43 -05:00
parent 6931d320a2
commit ebfd7e03ce
21 changed files with 489 additions and 63 deletions
--- a/src/cli.c
+++ b/src/cli.c
@@ -2,8 +2,8 @@

 #define DEFAULT_OUTPUT "index.sist2/"
 #define DEFAULT_CONTENT_SIZE 4096
-#define DEFAULT_QUALITY 15
-#define DEFAULT_SIZE 200
+#define DEFAULT_QUALITY 5
+#define DEFAULT_SIZE 500
 #define DEFAULT_REWRITE_URL ""

 #define DEFAULT_ES_URL "http://localhost:9200"
@@ -25,7 +25,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {

    char *abs_path = abspath(argv[1]);
    if (abs_path == NULL) {
-        fprintf(stderr, "File not found: %s", argv[1]);
+        fprintf(stderr, "File not found: %s\n", argv[1]);
        return 1;
    } else {
        args->path = abs_path;
@@ -34,7 +34,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
    if (args->incremental != NULL) {
        abs_path = abspath(args->incremental);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s", args->incremental);
+            fprintf(stderr, "File not found: %s\n", args->incremental);
            return 1;
        }
    }
@@ -100,7 +100,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {

    char *index_path = abspath(argv[1]);
    if (index_path == NULL) {
-        fprintf(stderr, "File not found: %s", argv[1]);
+        fprintf(stderr, "File not found: %s\n", argv[1]);
        return 1;
    } else {
        args->index_path = argv[1];
@@ -109,6 +109,27 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
    if (args->es_url == NULL) {
        args->es_url = DEFAULT_ES_URL;
    }
+
+    if (args->script_path != NULL) {
+        struct stat info;
+        int res = stat(args->script_path, &info);
+
+        if (res == -1) {
+            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+            return 1;
+        }
+
+        int fd = open(args->script_path, O_RDONLY);
+        if (fd == -1) {
+            fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
+            return 1;
+        }
+
+        args->script = malloc(info.st_size + 1);
+        read(fd, args->script, info.st_size);
+        *(args->script + info.st_size) = '\0';
+        close(fd);
+    }
    return 0;
 }

@@ -137,7 +158,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
    for (int i = 0; i < args->index_count; i++) {
        char *abs_path = abspath(args->indices[i]);
        if (abs_path == NULL) {
-            fprintf(stderr, "File not found: %s", abs_path);
+            fprintf(stderr, "File not found: %s\n", abs_path);
            return 1;
        }
    }
--- a/src/cli.h
+++ b/src/cli.h
@@ -22,6 +22,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
 typedef struct index_args {
    char *es_url;
    const char *index_path;
+    const char *script_path;
+    char *script;
    int print;
    int force_reset;
 } index_args_t;
--- a/src/index/elastic.c
+++ b/src/index/elastic.c
@@ -6,7 +6,6 @@
 #include <stdio.h>
 #include <string.h>
 #include <cJSON/cJSON.h>
-#include <src/ctx.h>

 #include "static_generated.c"

@@ -54,6 +53,40 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
    elastic_index_line(bulk_line);
 }

+void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
+
+    cJSON *body = cJSON_CreateObject();
+    cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
+    cJSON_AddStringToObject(script_obj, "lang", "painless");
+    cJSON_AddStringToObject(script_obj, "source", script);
+
+    cJSON *query = cJSON_AddObjectToObject(body, "query");
+    cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
+    cJSON_AddStringToObject(term_obj, "index", index_id);
+
+    char * str = cJSON_Print(body);
+
+    char bulk_url[4096];
+    snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
+    response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
+    printf("Executed user script <%d>\n", r->status_code);
+    cJSON *resp = cJSON_Parse(r->body);
+
+    cJSON_free(str);
+    cJSON_Delete(body);
+    free_response(r);
+
+    cJSON *error = cJSON_GetObjectItem(resp, "error");
+    if (error != NULL) {
+        char *error_str = cJSON_Print(error);
+
+        fprintf(stderr, "User script error: \n%s\n", error_str);
+        cJSON_free(error_str);
+    }
+
+    cJSON_Delete(resp);
+}
+
 void elastic_flush() {

    if (Indexer == NULL) {
@@ -115,6 +148,7 @@ void elastic_flush() {
    cJSON_Delete(ret_json);

    free_response(r);
+    free(buf);
 }

 void elastic_index_line(es_bulk_line_t *line) {
@@ -140,8 +174,7 @@ void elastic_index_line(es_bulk_line_t *line) {

 es_indexer_t *create_indexer(const char *url) {

-    size_t url_len = strlen(url);
-    char *es_url = malloc(url_len);
+    char *es_url = malloc(strlen(url) + 1);
    strcpy(es_url, url);

    es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -154,7 +187,7 @@ es_indexer_t *create_indexer(const char *url) {
    return indexer;
 }

-void destroy_indexer() {
+void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {

    char url[4096];

@@ -163,6 +196,15 @@ void destroy_indexer() {
    printf("Refresh index <%d>\n", r->status_code);
    free_response(r);

+    if (script != NULL) {
+        execute_update_script(script, index_id);
+    }
+
+    snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
+    r = web_post(url, "", NULL);
+    printf("Refresh index <%d>\n", r->status_code);
+    free_response(r);
+
    snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
    r = web_post(url, "", NULL);
    printf("Merge index <%d>\n", r->status_code);
--- a/src/index/elastic.h
+++ b/src/index/elastic.h
@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);

 es_indexer_t *create_indexer(const char* es_url);

-void destroy_indexer();
+void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);

 void elastic_init(int force_reset);

--- a/src/index/static_generated.c
+++ b/src/index/static_generated.c
--- a/src/io/serialize.c
+++ b/src/io/serialize.c
@@ -56,7 +56,7 @@ index_descriptor_t read_index_descriptor(char *path) {
    int fd = open(path, O_RDONLY);

    if (fd == -1) {
-        fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)");
+        fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
        exit(1);
    }

@@ -264,8 +264,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
        }

        func(document, uuid_str);
-        cJSON_free(document);
+        cJSON_Delete(document);
    }
+    dyn_buffer_destroy(&buf);
    fclose(file);
 }

--- a/src/main.c
+++ b/src/main.c
@@ -10,7 +10,7 @@
 #define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


-static const char *const Version = "1.1.4";
+static const char *const Version = "1.1.5";
 static const char *const usage[] = {
        "sist2 scan [OPTION]... PATH",
        "sist2 index [OPTION]... INDEX",
@@ -163,10 +163,11 @@ void sist2_index(index_args_t *args) {
            read_index(file_path, desc.uuid, f);
        }
    }
+    closedir(dir);

    if (!args->print) {
        elastic_flush();
-        destroy_indexer();
+        destroy_indexer(args->script, desc.uuid);
    }
 }

@@ -208,16 +209,20 @@ int main(int argc, const char *argv[]) {
    web_args_t *web_args = web_args_create();
    #endif

+    int arg_version = 0;
+
    char * common_es_url = NULL;

    struct argparse_option options[] = {
            OPT_HELP(),

+            OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
+
            OPT_GROUP("Scan options"),
            OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
            OPT_FLOAT('q', "quality", &scan_args->quality,
-                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
-            OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
+                      "Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
+            OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
            OPT_INTEGER(0, "content-size", &scan_args->content_size,
                        "Number of bytes to be extracted from text documents. DEFAULT=4096"),
            OPT_STRING(0, "incremental", &scan_args->incremental,
@@ -230,6 +235,7 @@ int main(int argc, const char *argv[]) {
            OPT_GROUP("Index options"),
            OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
            OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
+            OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
            OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
                                                              "(You must use this option the first time you use the index command)"),

@@ -247,6 +253,11 @@ int main(int argc, const char *argv[]) {
    argparse_describe(&argparse, DESCRIPTION, EPILOG);
    argc = argparse_parse(&argparse, argc, argv);

+    if (arg_version) {
+        printf(Version);
+        exit(0);
+    }
+
    #ifndef SIST_SCAN_ONLY
    web_args->es_url = common_es_url;
    index_args->es_url = common_es_url;
--- a/src/web/static_generated.c
+++ b/src/web/static_generated.c