User scripts, bug fixes, docker image

This commit is contained in:
2019-11-12 20:58:43 -05:00
parent 6931d320a2
commit ebfd7e03ce
21 changed files with 489 additions and 63 deletions

View File

@@ -2,8 +2,8 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 4096
#define DEFAULT_QUALITY 15
#define DEFAULT_SIZE 200
#define DEFAULT_QUALITY 5
#define DEFAULT_SIZE 500
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
@@ -25,7 +25,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->path = abs_path;
@@ -34,7 +34,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->incremental != NULL) {
abs_path = abspath(args->incremental);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", args->incremental);
fprintf(stderr, "File not found: %s\n", args->incremental);
return 1;
}
}
@@ -100,7 +100,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s", argv[1]);
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
} else {
args->index_path = argv[1];
@@ -109,6 +109,27 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
if (args->es_url == NULL) {
args->es_url = DEFAULT_ES_URL;
}
if (args->script_path != NULL) {
struct stat info;
int res = stat(args->script_path, &info);
if (res == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
int fd = open(args->script_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
return 1;
}
args->script = malloc(info.st_size + 1);
read(fd, args->script, info.st_size);
*(args->script + info.st_size) = '\0';
close(fd);
}
return 0;
}
@@ -137,7 +158,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s", abs_path);
fprintf(stderr, "File not found: %s\n", abs_path);
return 1;
}
}

View File

@@ -22,6 +22,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
const char *index_path;
const char *script_path;
char *script;
int print;
int force_reset;
} index_args_t;

View File

@@ -6,7 +6,6 @@
#include <stdio.h>
#include <string.h>
#include <cJSON/cJSON.h>
#include <src/ctx.h>
#include "static_generated.c"
@@ -54,6 +53,40 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
elastic_index_line(bulk_line);
}
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
cJSON *body = cJSON_CreateObject();
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
cJSON_AddStringToObject(script_obj, "lang", "painless");
cJSON_AddStringToObject(script_obj, "source", script);
cJSON *query = cJSON_AddObjectToObject(body, "query");
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char * str = cJSON_Print(body);
char bulk_url[4096];
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
printf("Executed user script <%d>\n", r->status_code);
cJSON *resp = cJSON_Parse(r->body);
cJSON_free(str);
cJSON_Delete(body);
free_response(r);
cJSON *error = cJSON_GetObjectItem(resp, "error");
if (error != NULL) {
char *error_str = cJSON_Print(error);
fprintf(stderr, "User script error: \n%s\n", error_str);
cJSON_free(error_str);
}
cJSON_Delete(resp);
}
void elastic_flush() {
if (Indexer == NULL) {
@@ -115,6 +148,7 @@ void elastic_flush() {
cJSON_Delete(ret_json);
free_response(r);
free(buf);
}
void elastic_index_line(es_bulk_line_t *line) {
@@ -140,8 +174,7 @@ void elastic_index_line(es_bulk_line_t *line) {
es_indexer_t *create_indexer(const char *url) {
size_t url_len = strlen(url);
char *es_url = malloc(url_len);
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
@@ -154,7 +187,7 @@ es_indexer_t *create_indexer(const char *url) {
return indexer;
}
void destroy_indexer() {
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
char url[4096];
@@ -163,6 +196,15 @@ void destroy_indexer() {
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
if (script != NULL) {
execute_update_script(script, index_id);
}
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Refresh index <%d>\n", r->status_code);
free_response(r);
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
r = web_post(url, "", NULL);
printf("Merge index <%d>\n", r->status_code);

View File

@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
es_indexer_t *create_indexer(const char* es_url);
void destroy_indexer();
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
void elastic_init(int force_reset);

File diff suppressed because one or more lines are too long

View File

@@ -56,7 +56,7 @@ index_descriptor_t read_index_descriptor(char *path) {
int fd = open(path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)");
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
exit(1);
}
@@ -264,8 +264,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
}
func(document, uuid_str);
cJSON_free(document);
cJSON_Delete(document);
}
dyn_buffer_destroy(&buf);
fclose(file);
}

View File

@@ -10,7 +10,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
static const char *const Version = "1.1.4";
static const char *const Version = "1.1.5";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
@@ -163,10 +163,11 @@ void sist2_index(index_args_t *args) {
read_index(file_path, desc.uuid, f);
}
}
closedir(dir);
if (!args->print) {
elastic_flush();
destroy_indexer();
destroy_indexer(args->script, desc.uuid);
}
}
@@ -208,16 +209,20 @@ int main(int argc, const char *argv[]) {
web_args_t *web_args = web_args_create();
#endif
int arg_version = 0;
char * common_es_url = NULL;
struct argparse_option options[] = {
OPT_HELP(),
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
OPT_STRING(0, "incremental", &scan_args->incremental,
@@ -230,6 +235,7 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Index options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
"(You must use this option the first time you use the index command)"),
@@ -247,6 +253,11 @@ int main(int argc, const char *argv[]) {
argparse_describe(&argparse, DESCRIPTION, EPILOG);
argc = argparse_parse(&argparse, argc, argv);
if (arg_version) {
printf(Version);
exit(0);
}
#ifndef SIST_SCAN_ONLY
web_args->es_url = common_es_url;
index_args->es_url = common_es_url;

File diff suppressed because one or more lines are too long