mirror of
https://github.com/simon987/sist2.git
synced 2025-12-11 14:38:54 +00:00
User scripts, bug fixes, docker image
This commit is contained in:
33
src/cli.c
33
src/cli.c
@@ -2,8 +2,8 @@
|
||||
|
||||
#define DEFAULT_OUTPUT "index.sist2/"
|
||||
#define DEFAULT_CONTENT_SIZE 4096
|
||||
#define DEFAULT_QUALITY 15
|
||||
#define DEFAULT_SIZE 200
|
||||
#define DEFAULT_QUALITY 5
|
||||
#define DEFAULT_SIZE 500
|
||||
#define DEFAULT_REWRITE_URL ""
|
||||
|
||||
#define DEFAULT_ES_URL "http://localhost:9200"
|
||||
@@ -25,7 +25,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *abs_path = abspath(argv[1]);
|
||||
if (abs_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s", argv[1]);
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->path = abs_path;
|
||||
@@ -34,7 +34,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
|
||||
if (args->incremental != NULL) {
|
||||
abs_path = abspath(args->incremental);
|
||||
if (abs_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s", args->incremental);
|
||||
fprintf(stderr, "File not found: %s\n", args->incremental);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -100,7 +100,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
|
||||
char *index_path = abspath(argv[1]);
|
||||
if (index_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s", argv[1]);
|
||||
fprintf(stderr, "File not found: %s\n", argv[1]);
|
||||
return 1;
|
||||
} else {
|
||||
args->index_path = argv[1];
|
||||
@@ -109,6 +109,27 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
||||
if (args->es_url == NULL) {
|
||||
args->es_url = DEFAULT_ES_URL;
|
||||
}
|
||||
|
||||
if (args->script_path != NULL) {
|
||||
struct stat info;
|
||||
int res = stat(args->script_path, &info);
|
||||
|
||||
if (res == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int fd = open(args->script_path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening script file '%s': %s\n", args->script_path, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
args->script = malloc(info.st_size + 1);
|
||||
read(fd, args->script, info.st_size);
|
||||
*(args->script + info.st_size) = '\0';
|
||||
close(fd);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -137,7 +158,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
|
||||
for (int i = 0; i < args->index_count; i++) {
|
||||
char *abs_path = abspath(args->indices[i]);
|
||||
if (abs_path == NULL) {
|
||||
fprintf(stderr, "File not found: %s", abs_path);
|
||||
fprintf(stderr, "File not found: %s\n", abs_path);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
||||
typedef struct index_args {
|
||||
char *es_url;
|
||||
const char *index_path;
|
||||
const char *script_path;
|
||||
char *script;
|
||||
int print;
|
||||
int force_reset;
|
||||
} index_args_t;
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cJSON/cJSON.h>
|
||||
#include <src/ctx.h>
|
||||
|
||||
#include "static_generated.c"
|
||||
|
||||
@@ -54,6 +53,40 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]) {
|
||||
elastic_index_line(bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, const char index_id[UUID_STR_LEN]) {
|
||||
|
||||
cJSON *body = cJSON_CreateObject();
|
||||
cJSON *script_obj = cJSON_AddObjectToObject(body, "script");
|
||||
cJSON_AddStringToObject(script_obj, "lang", "painless");
|
||||
cJSON_AddStringToObject(script_obj, "source", script);
|
||||
|
||||
cJSON *query = cJSON_AddObjectToObject(body, "query");
|
||||
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
|
||||
cJSON_AddStringToObject(term_obj, "index", index_id);
|
||||
|
||||
char * str = cJSON_Print(body);
|
||||
|
||||
char bulk_url[4096];
|
||||
snprintf(bulk_url, 4096, "%s/sist2/_update_by_query?pretty", Indexer->es_url);
|
||||
response_t *r = web_post(bulk_url, str, "Content-Type: application/json");
|
||||
printf("Executed user script <%d>\n", r->status_code);
|
||||
cJSON *resp = cJSON_Parse(r->body);
|
||||
|
||||
cJSON_free(str);
|
||||
cJSON_Delete(body);
|
||||
free_response(r);
|
||||
|
||||
cJSON *error = cJSON_GetObjectItem(resp, "error");
|
||||
if (error != NULL) {
|
||||
char *error_str = cJSON_Print(error);
|
||||
|
||||
fprintf(stderr, "User script error: \n%s\n", error_str);
|
||||
cJSON_free(error_str);
|
||||
}
|
||||
|
||||
cJSON_Delete(resp);
|
||||
}
|
||||
|
||||
void elastic_flush() {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
@@ -115,6 +148,7 @@ void elastic_flush() {
|
||||
cJSON_Delete(ret_json);
|
||||
|
||||
free_response(r);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line) {
|
||||
@@ -140,8 +174,7 @@ void elastic_index_line(es_bulk_line_t *line) {
|
||||
|
||||
es_indexer_t *create_indexer(const char *url) {
|
||||
|
||||
size_t url_len = strlen(url);
|
||||
char *es_url = malloc(url_len);
|
||||
char *es_url = malloc(strlen(url) + 1);
|
||||
strcpy(es_url, url);
|
||||
|
||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||
@@ -154,7 +187,7 @@ es_indexer_t *create_indexer(const char *url) {
|
||||
return indexer;
|
||||
}
|
||||
|
||||
void destroy_indexer() {
|
||||
void destroy_indexer(char * script, char index_id[UUID_STR_LEN]) {
|
||||
|
||||
char url[4096];
|
||||
|
||||
@@ -163,6 +196,15 @@ void destroy_indexer() {
|
||||
printf("Refresh index <%d>\n", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
if (script != NULL) {
|
||||
execute_update_script(script, index_id);
|
||||
}
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_refresh", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
printf("Refresh index <%d>\n", r->status_code);
|
||||
free_response(r);
|
||||
|
||||
snprintf(url, sizeof(url), "%s/sist2/_forcemerge", IndexCtx.es_url);
|
||||
r = web_post(url, "", NULL);
|
||||
printf("Merge index <%d>\n", r->status_code);
|
||||
|
||||
@@ -24,7 +24,7 @@ void index_json(cJSON *document, const char uuid_str[UUID_STR_LEN]);
|
||||
|
||||
es_indexer_t *create_indexer(const char* es_url);
|
||||
|
||||
void destroy_indexer();
|
||||
void destroy_indexer(char *script, char index_id[UUID_STR_LEN]);
|
||||
|
||||
void elastic_init(int force_reset);
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -56,7 +56,7 @@ index_descriptor_t read_index_descriptor(char *path) {
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)");
|
||||
fprintf(stderr, "Invalid/corrupt index (Could not find descriptor)\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -264,8 +264,9 @@ void read_index(const char *path, const char index_id[UUID_STR_LEN], index_func
|
||||
}
|
||||
|
||||
func(document, uuid_str);
|
||||
cJSON_free(document);
|
||||
cJSON_Delete(document);
|
||||
}
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
|
||||
19
src/main.c
19
src/main.c
@@ -10,7 +10,7 @@
|
||||
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"
|
||||
|
||||
|
||||
static const char *const Version = "1.1.4";
|
||||
static const char *const Version = "1.1.5";
|
||||
static const char *const usage[] = {
|
||||
"sist2 scan [OPTION]... PATH",
|
||||
"sist2 index [OPTION]... INDEX",
|
||||
@@ -163,10 +163,11 @@ void sist2_index(index_args_t *args) {
|
||||
read_index(file_path, desc.uuid, f);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (!args->print) {
|
||||
elastic_flush();
|
||||
destroy_indexer();
|
||||
destroy_indexer(args->script, desc.uuid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -208,16 +209,20 @@ int main(int argc, const char *argv[]) {
|
||||
web_args_t *web_args = web_args_create();
|
||||
#endif
|
||||
|
||||
int arg_version = 0;
|
||||
|
||||
char * common_es_url = NULL;
|
||||
|
||||
struct argparse_option options[] = {
|
||||
OPT_HELP(),
|
||||
|
||||
OPT_BOOLEAN('v', "version", &arg_version, "Show version and exit"),
|
||||
|
||||
OPT_GROUP("Scan options"),
|
||||
OPT_INTEGER('t', "threads", &scan_args->threads, "Number of threads. DEFAULT=1"),
|
||||
OPT_FLOAT('q', "quality", &scan_args->quality,
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=15"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=200"),
|
||||
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=5"),
|
||||
OPT_INTEGER(0, "size", &scan_args->size, "Thumbnail size, in pixels. DEFAULT=500"),
|
||||
OPT_INTEGER(0, "content-size", &scan_args->content_size,
|
||||
"Number of bytes to be extracted from text documents. DEFAULT=4096"),
|
||||
OPT_STRING(0, "incremental", &scan_args->incremental,
|
||||
@@ -230,6 +235,7 @@ int main(int argc, const char *argv[]) {
|
||||
OPT_GROUP("Index options"),
|
||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),
|
||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||
OPT_STRING(0, "script-file", &index_args->script_path, "Path to user script."),
|
||||
OPT_BOOLEAN('f', "force-reset", &index_args->force_reset, "Reset Elasticsearch mappings and settings. "
|
||||
"(You must use this option the first time you use the index command)"),
|
||||
|
||||
@@ -247,6 +253,11 @@ int main(int argc, const char *argv[]) {
|
||||
argparse_describe(&argparse, DESCRIPTION, EPILOG);
|
||||
argc = argparse_parse(&argparse, argc, argv);
|
||||
|
||||
if (arg_version) {
|
||||
printf(Version);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#ifndef SIST_SCAN_ONLY
|
||||
web_args->es_url = common_es_url;
|
||||
index_args->es_url = common_es_url;
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user