mirror of
https://github.com/simon987/sist2.git
synced 2025-04-24 12:45:56 +00:00
Compare commits
9 Commits
8ab8124370
...
8fa34da02f
Author | SHA1 | Date | |
---|---|---|---|
8fa34da02f | |||
37919932de | |||
|
501064da10 | ||
|
8f7edf3190 | ||
|
e65905a165 | ||
|
2cb57f3634 | ||
|
679e12f786 | ||
|
291d307689 | ||
|
7d40b9e959 |
2
scripts/start_dev_es.sh
Executable file
2
scripts/start_dev_es.sh
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
docker run --rm -it -p 9200:9200 -e "discovery.type=single-node" \
|
||||||
|
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0
|
10
src/cli.c
10
src/cli.c
@ -65,6 +65,10 @@ void index_args_destroy(index_args_t *args) {
|
|||||||
if (args->es_settings_path) {
|
if (args->es_settings_path) {
|
||||||
free(args->es_settings);
|
free(args->es_settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (args->index_path != NULL) {
|
||||||
|
free(args->index_path);
|
||||||
|
}
|
||||||
free(args);
|
free(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,8 +340,7 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
|
|||||||
if (index_path == NULL) {
|
if (index_path == NULL) {
|
||||||
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
|
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
|
||||||
} else {
|
} else {
|
||||||
args->index_path = argv[1];
|
args->index_path = index_path;
|
||||||
free(index_path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->es_url == NULL) {
|
if (args->es_url == NULL) {
|
||||||
@ -522,8 +525,7 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
|
|||||||
if (index_path == NULL) {
|
if (index_path == NULL) {
|
||||||
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
|
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
|
||||||
} else {
|
} else {
|
||||||
args->index_path = argv[1];
|
args->index_path = index_path;
|
||||||
free(index_path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (args->es_url == NULL) {
|
if (args->es_url == NULL) {
|
||||||
|
@ -44,7 +44,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
|
|||||||
typedef struct index_args {
|
typedef struct index_args {
|
||||||
char *es_url;
|
char *es_url;
|
||||||
char *es_index;
|
char *es_index;
|
||||||
const char *index_path;
|
char *index_path;
|
||||||
const char *script_path;
|
const char *script_path;
|
||||||
char *script;
|
char *script;
|
||||||
const char *es_settings_path;
|
const char *es_settings_path;
|
||||||
@ -56,6 +56,7 @@ typedef struct index_args {
|
|||||||
int async_script;
|
int async_script;
|
||||||
int force_reset;
|
int force_reset;
|
||||||
int threads;
|
int threads;
|
||||||
|
int incremental;
|
||||||
} index_args_t;
|
} index_args_t;
|
||||||
|
|
||||||
typedef struct web_args {
|
typedef struct web_args {
|
||||||
|
@ -41,6 +41,7 @@ typedef struct {
|
|||||||
|
|
||||||
GHashTable *original_table;
|
GHashTable *original_table;
|
||||||
GHashTable *copy_table;
|
GHashTable *copy_table;
|
||||||
|
GHashTable *new_table;
|
||||||
pthread_mutex_t copy_table_mu;
|
pthread_mutex_t copy_table_mu;
|
||||||
|
|
||||||
pcre *exclude;
|
pcre *exclude;
|
||||||
@ -85,6 +86,10 @@ typedef struct {
|
|||||||
GHashTable *tags;
|
GHashTable *tags;
|
||||||
store_t *meta_store;
|
store_t *meta_store;
|
||||||
GHashTable *meta;
|
GHashTable *meta;
|
||||||
|
/**
|
||||||
|
* Set to false when using --print
|
||||||
|
*/
|
||||||
|
int needs_es_connection;
|
||||||
} IndexCtx_t;
|
} IndexCtx_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -15,19 +15,34 @@ typedef struct es_indexer {
|
|||||||
} es_indexer_t;
|
} es_indexer_t;
|
||||||
|
|
||||||
|
|
||||||
static __thread es_indexer_t *Indexer;
|
static __thread es_indexer_t *Indexer = NULL;
|
||||||
|
|
||||||
void delete_queue(int max);
|
void free_queue(int max);
|
||||||
|
|
||||||
void elastic_flush();
|
void elastic_flush();
|
||||||
|
|
||||||
void elastic_cleanup() {
|
void destroy_indexer(es_indexer_t *indexer) {
|
||||||
elastic_flush();
|
|
||||||
if (Indexer != NULL) {
|
if (indexer == NULL) {
|
||||||
free(Indexer->es_index);
|
return;
|
||||||
free(Indexer->es_url);
|
|
||||||
free(Indexer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_DEBUG("elastic.c", "Destroying indexer")
|
||||||
|
|
||||||
|
if (indexer->es_url != NULL) {
|
||||||
|
free(indexer->es_url);
|
||||||
|
free(indexer->es_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(indexer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void elastic_cleanup() {
|
||||||
|
if (IndexCtx.needs_es_connection) {
|
||||||
|
elastic_flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
destroy_indexer(Indexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
||||||
@ -52,11 +67,22 @@ void index_json_func(void *arg) {
|
|||||||
elastic_index_line(line);
|
elastic_index_line(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void delete_document(const char* document_id_str, void* UNUSED(_data)) {
|
||||||
|
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
|
||||||
|
bulk_line->type = ES_BULK_LINE_DELETE;
|
||||||
|
bulk_line->next = NULL;
|
||||||
|
|
||||||
|
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
|
||||||
|
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||||
char *json = cJSON_PrintUnformatted(document);
|
char *json = cJSON_PrintUnformatted(document);
|
||||||
|
|
||||||
size_t json_len = strlen(json);
|
size_t json_len = strlen(json);
|
||||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||||
|
bulk_line->type = ES_BULK_LINE_INDEX;
|
||||||
memcpy(bulk_line->line, json, json_len);
|
memcpy(bulk_line->line, json, json_len);
|
||||||
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
||||||
*(bulk_line->line + json_len) = '\n';
|
*(bulk_line->line + json_len) = '\n';
|
||||||
@ -125,30 +151,47 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
|||||||
size_t buf_cur = 0;
|
size_t buf_cur = 0;
|
||||||
char *buf = malloc(8192);
|
char *buf = malloc(8192);
|
||||||
size_t buf_capacity = 8192;
|
size_t buf_capacity = 8192;
|
||||||
|
#define GROW_BUF(delta) \
|
||||||
|
while (buf_size + (delta) > buf_capacity) { \
|
||||||
|
buf_capacity *= 2; \
|
||||||
|
buf = realloc(buf, buf_capacity); \
|
||||||
|
} \
|
||||||
|
buf_size += (delta); \
|
||||||
|
|
||||||
|
// see: https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
|
||||||
|
// ES_BULK_LINE_INDEX: two lines, 1st action, 2nd content
|
||||||
|
// ES_BULK_LINE_DELETE: one line
|
||||||
while (line != NULL && *count < max) {
|
while (line != NULL && *count < max) {
|
||||||
char action_str[256];
|
char action_str[256];
|
||||||
snprintf(
|
if (line->type == ES_BULK_LINE_INDEX) {
|
||||||
action_str, sizeof(action_str),
|
snprintf(
|
||||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
action_str, sizeof(action_str),
|
||||||
line->path_md5_str, Indexer->es_index
|
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||||
);
|
line->path_md5_str, Indexer->es_index
|
||||||
|
);
|
||||||
|
|
||||||
size_t action_str_len = strlen(action_str);
|
size_t action_str_len = strlen(action_str);
|
||||||
size_t line_len = strlen(line->line);
|
size_t line_len = strlen(line->line);
|
||||||
|
|
||||||
while (buf_size + line_len + action_str_len > buf_capacity) {
|
GROW_BUF(action_str_len + line_len);
|
||||||
buf_capacity *= 2;
|
|
||||||
buf = realloc(buf, buf_capacity);
|
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||||
|
buf_cur += action_str_len;
|
||||||
|
memcpy(buf + buf_cur, line->line, line_len);
|
||||||
|
buf_cur += line_len;
|
||||||
|
|
||||||
|
} else if (line->type == ES_BULK_LINE_DELETE) {
|
||||||
|
snprintf(
|
||||||
|
action_str, sizeof(action_str),
|
||||||
|
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
|
||||||
|
line->path_md5_str, Indexer->es_index
|
||||||
|
);
|
||||||
|
|
||||||
|
size_t action_str_len = strlen(action_str);
|
||||||
|
GROW_BUF(action_str_len);
|
||||||
|
memcpy(buf + buf_cur, action_str, action_str_len);
|
||||||
|
buf_cur += action_str_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf_size += line_len + action_str_len;
|
|
||||||
|
|
||||||
memcpy(buf + buf_cur, action_str, action_str_len);
|
|
||||||
buf_cur += action_str_len;
|
|
||||||
memcpy(buf + buf_cur, line->line, line_len);
|
|
||||||
buf_cur += line_len;
|
|
||||||
|
|
||||||
line = line->next;
|
line = line->next;
|
||||||
(*count)++;
|
(*count)++;
|
||||||
}
|
}
|
||||||
@ -223,7 +266,7 @@ void _elastic_flush(int max) {
|
|||||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
||||||
free_response(r);
|
free_response(r);
|
||||||
free(buf);
|
free(buf);
|
||||||
delete_queue(1);
|
free_queue(1);
|
||||||
if (Indexer->queued != 0) {
|
if (Indexer->queued != 0) {
|
||||||
elastic_flush();
|
elastic_flush();
|
||||||
}
|
}
|
||||||
@ -248,13 +291,13 @@ void _elastic_flush(int max) {
|
|||||||
|
|
||||||
} else if (r->status_code != 200) {
|
} else if (r->status_code != 200) {
|
||||||
print_errors(r);
|
print_errors(r);
|
||||||
delete_queue(Indexer->queued);
|
free_queue(Indexer->queued);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
print_errors(r);
|
print_errors(r);
|
||||||
LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
|
||||||
delete_queue(max);
|
free_queue(max);
|
||||||
|
|
||||||
if (Indexer->queued != 0) {
|
if (Indexer->queued != 0) {
|
||||||
elastic_flush();
|
elastic_flush();
|
||||||
@ -265,7 +308,7 @@ void _elastic_flush(int max) {
|
|||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void delete_queue(int max) {
|
void free_queue(int max) {
|
||||||
for (int i = 0; i < max; i++) {
|
for (int i = 0; i < max; i++) {
|
||||||
es_bulk_line_t *tmp = Indexer->line_head;
|
es_bulk_line_t *tmp = Indexer->line_head;
|
||||||
Indexer->line_head = tmp->next;
|
Indexer->line_head = tmp->next;
|
||||||
@ -309,16 +352,22 @@ void elastic_index_line(es_bulk_line_t *line) {
|
|||||||
|
|
||||||
es_indexer_t *create_indexer(const char *url, const char *index) {
|
es_indexer_t *create_indexer(const char *url, const char *index) {
|
||||||
|
|
||||||
char *es_url = malloc(strlen(url) + 1);
|
|
||||||
strcpy(es_url, url);
|
|
||||||
|
|
||||||
char *es_index = malloc(strlen(index) + 1);
|
|
||||||
strcpy(es_index, index);
|
|
||||||
|
|
||||||
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
|
||||||
|
|
||||||
indexer->es_url = es_url;
|
if (IndexCtx.needs_es_connection) {
|
||||||
indexer->es_index = es_index;
|
char *es_url = malloc(strlen(url) + 1);
|
||||||
|
strcpy(es_url, url);
|
||||||
|
|
||||||
|
char *es_index = malloc(strlen(index) + 1);
|
||||||
|
strcpy(es_index, index);
|
||||||
|
|
||||||
|
indexer->es_url = es_url;
|
||||||
|
indexer->es_index = es_index;
|
||||||
|
} else {
|
||||||
|
indexer->es_url = NULL;
|
||||||
|
indexer->es_index = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
indexer->queued = 0;
|
indexer->queued = 0;
|
||||||
indexer->line_head = NULL;
|
indexer->line_head = NULL;
|
||||||
indexer->line_tail = NULL;
|
indexer->line_tail = NULL;
|
||||||
|
@ -3,9 +3,13 @@
|
|||||||
|
|
||||||
#include "src/sist.h"
|
#include "src/sist.h"
|
||||||
|
|
||||||
|
#define ES_BULK_LINE_INDEX 0
|
||||||
|
#define ES_BULK_LINE_DELETE 1
|
||||||
|
|
||||||
typedef struct es_bulk_line {
|
typedef struct es_bulk_line {
|
||||||
struct es_bulk_line *next;
|
struct es_bulk_line *next;
|
||||||
char path_md5_str[MD5_STR_LENGTH];
|
char path_md5_str[MD5_STR_LENGTH];
|
||||||
|
int type;
|
||||||
char line[0];
|
char line[0];
|
||||||
} es_bulk_line_t;
|
} es_bulk_line_t;
|
||||||
|
|
||||||
@ -40,6 +44,8 @@ void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
|||||||
|
|
||||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||||
|
|
||||||
|
void delete_document(const char *document_id_str, void* data);
|
||||||
|
|
||||||
es_indexer_t *create_indexer(const char *url, const char *index);
|
es_indexer_t *create_indexer(const char *url, const char *index);
|
||||||
|
|
||||||
void elastic_cleanup();
|
void elastic_cleanup();
|
||||||
|
@ -398,7 +398,7 @@ void read_index_bin_handle_line(const char *line, const char *index_id, index_fu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_index_ndjson(const char *path, const char *index_id, index_func func) {
|
void read_lines(const char *path, const line_processor_t processor) {
|
||||||
dyn_buffer_t buf = dyn_buffer_create();
|
dyn_buffer_t buf = dyn_buffer_create();
|
||||||
|
|
||||||
// Initialize zstd things
|
// Initialize zstd things
|
||||||
@ -427,7 +427,7 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)
|
|||||||
|
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
dyn_buffer_write_char(&buf, '\0');
|
dyn_buffer_write_char(&buf, '\0');
|
||||||
read_index_bin_handle_line(buf.buf, index_id, func);
|
processor.func(buf.buf, processor.data);
|
||||||
buf.cur = 0;
|
buf.cur = 0;
|
||||||
} else {
|
} else {
|
||||||
dyn_buffer_write_char(&buf, c);
|
dyn_buffer_write_char(&buf, c);
|
||||||
@ -452,12 +452,22 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)
|
|||||||
|
|
||||||
dyn_buffer_destroy(&buf);
|
dyn_buffer_destroy(&buf);
|
||||||
fclose(file);
|
fclose(file);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void read_index_ndjson(const char *line, void* _data) {
|
||||||
|
void** data = _data;
|
||||||
|
const char* index_id = data[0];
|
||||||
|
index_func func = data[1];
|
||||||
|
read_index_bin_handle_line(line, index_id, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
||||||
|
|
||||||
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
||||||
read_index_ndjson(path, index_id, func);
|
read_lines(path, (line_processor_t) {
|
||||||
|
.data = (void*[2]){(void*)index_id, func} ,
|
||||||
|
.func = read_index_ndjson,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,6 +486,7 @@ void incremental_read(GHashTable *table, const char *filepath, index_descriptor_
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __thread GHashTable *IncrementalCopyTable = NULL;
|
static __thread GHashTable *IncrementalCopyTable = NULL;
|
||||||
|
static __thread GHashTable *IncrementalNewTable = NULL;
|
||||||
static __thread store_t *IncrementalCopySourceStore = NULL;
|
static __thread store_t *IncrementalCopySourceStore = NULL;
|
||||||
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
||||||
|
|
||||||
@ -524,3 +535,33 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
|||||||
|
|
||||||
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||||
|
|
||||||
|
char path_md5_n[MD5_STR_LENGTH + 1];
|
||||||
|
path_md5_n[MD5_STR_LENGTH] = '\0';
|
||||||
|
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
|
||||||
|
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||||
|
|
||||||
|
// do not delete archive virtual entries
|
||||||
|
if (cJSON_GetObjectItem(document, "parent") == NULL
|
||||||
|
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
|
||||||
|
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
|
||||||
|
) {
|
||||||
|
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
|
||||||
|
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||||
|
GHashTable *copy_table, GHashTable *new_table) {
|
||||||
|
|
||||||
|
if (WriterCtx.out_file == NULL) {
|
||||||
|
initialize_writer_ctx(del_filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
IncrementalCopyTable = copy_table;
|
||||||
|
IncrementalNewTable = new_table;
|
||||||
|
|
||||||
|
read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
|
||||||
|
}
|
||||||
|
@ -7,13 +7,23 @@
|
|||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
|
typedef struct line_processor {
|
||||||
|
void* data;
|
||||||
|
void (*func)(const char*, void*);
|
||||||
|
} line_processor_t;
|
||||||
|
|
||||||
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
||||||
|
|
||||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||||
const char *dst_filepath, GHashTable *copy_table);
|
const char *dst_filepath, GHashTable *copy_table);
|
||||||
|
|
||||||
|
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||||
|
GHashTable *copy_table, GHashTable *new_table);
|
||||||
|
|
||||||
void write_document(document_t *doc);
|
void write_document(document_t *doc);
|
||||||
|
|
||||||
|
void read_lines(const char *path, const line_processor_t processor);
|
||||||
|
|
||||||
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
||||||
|
|
||||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
||||||
@ -29,4 +39,18 @@ void write_index_descriptor(char *path, index_descriptor_t *desc);
|
|||||||
|
|
||||||
index_descriptor_t read_index_descriptor(char *path);
|
index_descriptor_t read_index_descriptor(char *path);
|
||||||
|
|
||||||
#endif
|
// caller ensures char file_path[PATH_MAX]
|
||||||
|
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
|
||||||
|
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
|
||||||
|
if (0 == access(file_path, R_OK)) { \
|
||||||
|
action_ok; \
|
||||||
|
} else { \
|
||||||
|
action_main_fail; \
|
||||||
|
} \
|
||||||
|
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
|
||||||
|
if ((cond_original) && (0 == access(file_path, R_OK))) { \
|
||||||
|
action_ok; \
|
||||||
|
} \
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
152
src/main.c
152
src/main.c
@ -282,37 +282,84 @@ void initialize_scan_context(scan_args_t *args) {
|
|||||||
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads an existing index as the baseline for incremental scanning.
|
||||||
|
* 1. load old index files (original+main) => original_table
|
||||||
|
* 2. allocate empty table => copy_table
|
||||||
|
* 3. allocate empty table => new_table
|
||||||
|
* the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
|
||||||
|
* and consumed in main.c:save_incremental_index
|
||||||
|
*
|
||||||
|
* Note: the existing index may or may not be of incremental index form.
|
||||||
|
*/
|
||||||
void load_incremental_index(const scan_args_t *args) {
|
void load_incremental_index(const scan_args_t *args) {
|
||||||
|
char file_path[PATH_MAX];
|
||||||
|
|
||||||
ScanCtx.original_table = incremental_get_table();
|
ScanCtx.original_table = incremental_get_table();
|
||||||
ScanCtx.copy_table = incremental_get_table();
|
ScanCtx.copy_table = incremental_get_table();
|
||||||
|
ScanCtx.new_table = incremental_get_table();
|
||||||
DIR *dir = opendir(args->incremental);
|
|
||||||
if (dir == NULL) {
|
|
||||||
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
|
|
||||||
}
|
|
||||||
|
|
||||||
char descriptor_path[PATH_MAX];
|
char descriptor_path[PATH_MAX];
|
||||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
|
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
|
||||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||||
|
|
||||||
if (strcmp(original_desc.version, Version) != 0) {
|
if (strcmp(original_desc.version, Version) != 0) {
|
||||||
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
|
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct dirent *de;
|
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
|
||||||
while ((de = readdir(dir)) != NULL) {
|
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1);
|
||||||
if (strncmp(de->d_name, "_index", sizeof("_index") - 1) == 0) {
|
|
||||||
char file_path[PATH_MAX];
|
|
||||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
|
||||||
incremental_read(ScanCtx.original_table, file_path, &original_desc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
closedir(dir);
|
|
||||||
|
|
||||||
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves an incremental index.
|
||||||
|
* Before calling this function, the scanner should have finished writing the main index.
|
||||||
|
* 1. Build original_table - new_table => delete_table
|
||||||
|
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
|
||||||
|
*/
|
||||||
|
void save_incremental_index(scan_args_t* args) {
|
||||||
|
char dst_path[PATH_MAX];
|
||||||
|
char store_path[PATH_MAX];
|
||||||
|
char file_path[PATH_MAX];
|
||||||
|
char del_path[PATH_MAX];
|
||||||
|
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
||||||
|
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
|
||||||
|
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
||||||
|
|
||||||
|
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
|
||||||
|
g_hash_table_size(ScanCtx.original_table),
|
||||||
|
g_hash_table_size(ScanCtx.copy_table),
|
||||||
|
g_hash_table_size(ScanCtx.new_table));
|
||||||
|
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
|
||||||
|
READ_INDICES(file_path, args->incremental, incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
|
||||||
|
perror("incremental_delete"), 1);
|
||||||
|
writer_cleanup();
|
||||||
|
|
||||||
|
READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
|
||||||
|
perror("incremental_copy"), 1);
|
||||||
|
writer_cleanup();
|
||||||
|
|
||||||
|
store_destroy(source);
|
||||||
|
|
||||||
|
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
||||||
|
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
||||||
|
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
||||||
|
store_copy(source_tags, dst_path);
|
||||||
|
store_destroy(source_tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An index can be either incremental or non-incremental (initial index).
|
||||||
|
* For an initial index, there is only the "main" index.
|
||||||
|
* For an incremental index, there are, additionally:
|
||||||
|
* - An "original" index, referencing all files unchanged since the previous index.
|
||||||
|
* - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
|
||||||
|
* Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
|
||||||
|
* and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
|
||||||
|
* the old "delete" index can be safely ignored.
|
||||||
|
*/
|
||||||
void sist2_scan(scan_args_t *args) {
|
void sist2_scan(scan_args_t *args) {
|
||||||
|
|
||||||
ScanCtx.mime_table = mime_get_mime_table();
|
ScanCtx.mime_table = mime_get_mime_table();
|
||||||
@ -366,33 +413,7 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
|
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
|
||||||
|
|
||||||
if (args->incremental != NULL) {
|
if (args->incremental != NULL) {
|
||||||
char dst_path[PATH_MAX];
|
save_incremental_index(args);
|
||||||
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
|
|
||||||
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
|
|
||||||
store_t *source = store_create(store_path, STORE_SIZE_TN);
|
|
||||||
|
|
||||||
DIR *dir = opendir(args->incremental);
|
|
||||||
if (dir == NULL) {
|
|
||||||
perror("opendir");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
struct dirent *de;
|
|
||||||
while ((de = readdir(dir)) != NULL) {
|
|
||||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
|
||||||
char file_path[PATH_MAX];
|
|
||||||
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
|
|
||||||
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
closedir(dir);
|
|
||||||
store_destroy(source);
|
|
||||||
writer_cleanup();
|
|
||||||
|
|
||||||
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
|
|
||||||
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
|
|
||||||
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
|
|
||||||
store_copy(source_tags, dst_path);
|
|
||||||
store_destroy(source_tags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
|
||||||
@ -402,17 +423,19 @@ void sist2_scan(scan_args_t *args) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void sist2_index(index_args_t *args) {
|
void sist2_index(index_args_t *args) {
|
||||||
|
char file_path[PATH_MAX];
|
||||||
|
|
||||||
IndexCtx.es_url = args->es_url;
|
IndexCtx.es_url = args->es_url;
|
||||||
IndexCtx.es_index = args->es_index;
|
IndexCtx.es_index = args->es_index;
|
||||||
IndexCtx.batch_size = args->batch_size;
|
IndexCtx.batch_size = args->batch_size;
|
||||||
|
IndexCtx.needs_es_connection = !args->print;
|
||||||
|
|
||||||
if (!args->print) {
|
if (IndexCtx.needs_es_connection) {
|
||||||
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
char descriptor_path[PATH_MAX];
|
char descriptor_path[PATH_MAX];
|
||||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||||
|
|
||||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||||
|
|
||||||
@ -428,11 +451,11 @@ void sist2_index(index_args_t *args) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char path_tmp[PATH_MAX];
|
char path_tmp[PATH_MAX];
|
||||||
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
|
snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
|
||||||
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
|
||||||
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
|
||||||
|
|
||||||
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
|
snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
|
||||||
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
|
||||||
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
|
||||||
|
|
||||||
@ -443,32 +466,33 @@ void sist2_index(index_args_t *args) {
|
|||||||
f = index_json;
|
f = index_json;
|
||||||
}
|
}
|
||||||
|
|
||||||
void (*cleanup)();
|
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0);
|
||||||
if (args->print) {
|
|
||||||
cleanup = NULL;
|
|
||||||
} else {
|
|
||||||
cleanup = elastic_cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, args->print == 0);
|
|
||||||
tpool_start(IndexCtx.pool);
|
tpool_start(IndexCtx.pool);
|
||||||
|
|
||||||
struct dirent *de;
|
READ_INDICES(file_path, args->index_path, {
|
||||||
while ((de = readdir(dir)) != NULL) {
|
read_index(file_path, desc.id, desc.type, f);
|
||||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
|
||||||
char file_path[PATH_MAX];
|
}, {}, !args->incremental);
|
||||||
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
|
|
||||||
read_index(file_path, desc.id, desc.type, f);
|
// Only read the _delete index if we're sending data to ES
|
||||||
|
if (!args->print) {
|
||||||
|
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
|
||||||
|
if (0 == access(file_path, R_OK)) {
|
||||||
|
read_lines(file_path, (line_processor_t) {
|
||||||
|
.data = NULL,
|
||||||
|
.func = delete_document
|
||||||
|
});
|
||||||
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
|
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
closedir(dir);
|
closedir(dir);
|
||||||
|
|
||||||
tpool_wait(IndexCtx.pool);
|
tpool_wait(IndexCtx.pool);
|
||||||
|
|
||||||
tpool_destroy(IndexCtx.pool);
|
tpool_destroy(IndexCtx.pool);
|
||||||
|
|
||||||
if (!args->print) {
|
if (IndexCtx.needs_es_connection) {
|
||||||
finish_indexer(args->script, args->async_script, desc.id);
|
finish_indexer(args->script, args->async_script, desc.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,7 +507,7 @@ void sist2_exec_script(exec_args_t *args) {
|
|||||||
LogCtx.verbose = TRUE;
|
LogCtx.verbose = TRUE;
|
||||||
|
|
||||||
char descriptor_path[PATH_MAX];
|
char descriptor_path[PATH_MAX];
|
||||||
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
|
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
|
||||||
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
index_descriptor_t desc = read_index_descriptor(descriptor_path);
|
||||||
|
|
||||||
IndexCtx.es_url = args->es_url;
|
IndexCtx.es_url = args->es_url;
|
||||||
@ -606,6 +630,8 @@ int main(int argc, const char *argv[]) {
|
|||||||
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
|
||||||
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
|
||||||
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
|
||||||
|
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
|
||||||
|
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
|
||||||
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
|
||||||
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
|
||||||
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),
|
||||||
|
@ -80,16 +80,26 @@ void parse(void *arg) {
|
|||||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
||||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||||
incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5);
|
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
|
||||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||||
|
|
||||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||||
ScanCtx.dbg_skipped_files_count += 1;
|
ScanCtx.dbg_skipped_files_count += 1;
|
||||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||||
|
|
||||||
|
CLOSE_FILE(job->vfile)
|
||||||
|
free(doc->filepath);
|
||||||
|
free(doc);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ScanCtx.new_table != NULL) {
|
||||||
|
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||||
|
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
|
||||||
|
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||||
|
}
|
||||||
|
|
||||||
char *buf[MAGIC_BUF_SIZE];
|
char *buf[MAGIC_BUF_SIZE];
|
||||||
|
|
||||||
if (LogCtx.very_verbose) {
|
if (LogCtx.very_verbose) {
|
||||||
@ -123,11 +133,14 @@ void parse(void *arg) {
|
|||||||
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
|
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
|
||||||
}
|
}
|
||||||
|
|
||||||
CLOSE_FILE(job->vfile)
|
|
||||||
|
|
||||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||||
ScanCtx.dbg_failed_files_count += 1;
|
ScanCtx.dbg_failed_files_count += 1;
|
||||||
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
|
||||||
|
|
||||||
|
CLOSE_FILE(job->vfile)
|
||||||
|
free(doc->filepath);
|
||||||
|
free(doc);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
12
src/stats.c
12
src/stats.c
@ -96,16 +96,8 @@ void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void read_index_into_tables(index_t *index) {
|
void read_index_into_tables(index_t *index) {
|
||||||
DIR *dir = opendir(index->path);
|
char file_path[PATH_MAX];
|
||||||
struct dirent *de;
|
READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1);
|
||||||
while ((de = readdir(dir)) != NULL) {
|
|
||||||
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
|
|
||||||
char file_path[PATH_MAX];
|
|
||||||
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
|
|
||||||
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
closedir(dir);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t rfind(const char *str, int c) {
|
static size_t rfind(const char *str, int c) {
|
||||||
|
@ -134,10 +134,11 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Not thread safe!
|
* Marks a file by adding it to a table.
|
||||||
|
* !!Not thread safe.
|
||||||
*/
|
*/
|
||||||
__always_inline
|
__always_inline
|
||||||
static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||||
char *ptr = malloc(MD5_STR_LENGTH);
|
char *ptr = malloc(MD5_STR_LENGTH);
|
||||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||||
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
|
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
|
||||||
|
2
src/web/static_generated.c
vendored
2
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@ -39,7 +39,7 @@ def sist2_index(files, *args):
|
|||||||
return iter(sist2_index_to_dict("test_i"))
|
return iter(sist2_index_to_dict("test_i"))
|
||||||
|
|
||||||
|
|
||||||
def sist2_incremental_index(files, func=None, *args):
|
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
|
||||||
path = copy_files(files)
|
path = copy_files(files)
|
||||||
|
|
||||||
if func:
|
if func:
|
||||||
@ -47,11 +47,13 @@ def sist2_incremental_index(files, func=None, *args):
|
|||||||
|
|
||||||
shutil.rmtree("test_i_inc", ignore_errors=True)
|
shutil.rmtree("test_i_inc", ignore_errors=True)
|
||||||
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
|
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
|
||||||
return iter(sist2_index_to_dict("test_i_inc"))
|
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
|
||||||
|
|
||||||
|
|
||||||
def sist2_index_to_dict(index):
|
def sist2_index_to_dict(index, incremental_index=False):
|
||||||
res = sist2("index", "--print", index)
|
args = ["--incremental-index"] if incremental_index else []
|
||||||
|
|
||||||
|
res = sist2("index", "--print", "--very-verbose", *args, index)
|
||||||
|
|
||||||
for line in res.splitlines():
|
for line in res.splitlines():
|
||||||
if line:
|
if line:
|
||||||
@ -75,6 +77,7 @@ class ScanTest(unittest.TestCase):
|
|||||||
|
|
||||||
file_count = sum(1 for _ in sist2_index(TEST_FILES))
|
file_count = sum(1 for _ in sist2_index(TEST_FILES))
|
||||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
|
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
|
||||||
|
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
|
||||||
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
|
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user