mirror of
https://github.com/simon987/sist2.git
synced 2025-12-11 14:38:54 +00:00
Rework document IDs
This commit is contained in:
@@ -45,7 +45,7 @@ void elastic_cleanup() {
|
||||
destroy_indexer(Indexer);
|
||||
}
|
||||
|
||||
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
|
||||
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
|
||||
|
||||
cJSON *line = cJSON_CreateObject();
|
||||
|
||||
@@ -72,19 +72,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
|
||||
bulk_line->type = ES_BULK_LINE_DELETE;
|
||||
bulk_line->next = NULL;
|
||||
|
||||
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
|
||||
strcpy(bulk_line->doc_id, document_id_str);
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
char *json = cJSON_PrintUnformatted(document);
|
||||
|
||||
size_t json_len = strlen(json);
|
||||
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
|
||||
bulk_line->type = ES_BULK_LINE_INDEX;
|
||||
memcpy(bulk_line->line, json, json_len);
|
||||
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
|
||||
strcpy(bulk_line->doc_id, doc_id);
|
||||
*(bulk_line->line + json_len) = '\n';
|
||||
*(bulk_line->line + json_len + 1) = '\0';
|
||||
bulk_line->next = NULL;
|
||||
@@ -93,7 +93,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
|
||||
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
|
||||
}
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
|
||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
|
||||
|
||||
if (Indexer == NULL) {
|
||||
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
|
||||
@@ -167,7 +167,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
line->doc_id, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
@@ -184,7 +184,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
|
||||
snprintf(
|
||||
action_str, sizeof(action_str),
|
||||
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
|
||||
line->path_md5_str, Indexer->es_index
|
||||
line->doc_id, Indexer->es_index
|
||||
);
|
||||
|
||||
size_t action_str_len = strlen(action_str);
|
||||
@@ -263,7 +263,7 @@ void _elastic_flush(int max) {
|
||||
if (r->status_code == 413) {
|
||||
|
||||
if (max <= 1) {
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
|
||||
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
|
||||
free_response(r);
|
||||
free(buf);
|
||||
free_queue(1);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
typedef struct es_bulk_line {
|
||||
struct es_bulk_line *next;
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
char doc_id[SIST_DOC_ID_LEN];
|
||||
int type;
|
||||
char line[0];
|
||||
} es_bulk_line_t;
|
||||
@@ -40,9 +40,9 @@ typedef struct es_indexer es_indexer_t;
|
||||
|
||||
void elastic_index_line(es_bulk_line_t *line);
|
||||
|
||||
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
|
||||
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
|
||||
|
||||
void delete_document(const char *document_id_str, void* data);
|
||||
|
||||
@@ -59,6 +59,6 @@ char *elastic_get_status();
|
||||
|
||||
es_version_t *elastic_get_version(const char *es_url);
|
||||
|
||||
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
|
||||
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
|
||||
|
||||
#endif
|
||||
|
||||
2
src/index/static_generated.c
vendored
2
src/index/static_generated.c
vendored
File diff suppressed because one or more lines are too long
@@ -124,9 +124,7 @@ char *build_json_string(document_t *doc) {
|
||||
cJSON_AddStringToObject(json, "path", "");
|
||||
}
|
||||
|
||||
char md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
|
||||
cJSON_AddStringToObject(json, "_id", md5_str);
|
||||
cJSON_AddStringToObject(json, "_id", doc->doc_id);
|
||||
|
||||
// Metadata
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
@@ -452,32 +450,31 @@ void read_lines(const char *path, const line_processor_t processor) {
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
|
||||
}
|
||||
|
||||
void read_index_ndjson(const char *line, void* _data) {
|
||||
void** data = _data;
|
||||
const char* index_id = data[0];
|
||||
void read_index_ndjson(const char *line, void *_data) {
|
||||
void **data = _data;
|
||||
const char *index_id = data[0];
|
||||
index_func func = data[1];
|
||||
read_index_bin_handle_line(line, index_id, func);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
|
||||
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
||||
read_lines(path, (line_processor_t) {
|
||||
.data = (void*[2]){(void*)index_id, func} ,
|
||||
.func = read_index_ndjson,
|
||||
.data = (void *[2]) {(void *) index_id, func},
|
||||
.func = read_index_ndjson,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalReadTable = NULL;
|
||||
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
|
||||
|
||||
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
|
||||
incremental_put(IncrementalReadTable, path_md5_str, mtime);
|
||||
}
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
|
||||
@@ -490,13 +487,11 @@ static __thread GHashTable *IncrementalNewTable = NULL;
|
||||
static __thread store_t *IncrementalCopySourceStore = NULL;
|
||||
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
||||
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
|
||||
// Copy index line
|
||||
cJSON_DeleteItemFromObject(document, "index");
|
||||
char *json_str = cJSON_PrintUnformatted(document);
|
||||
@@ -510,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, sizeof(doc_id), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, sizeof(doc_id), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
@@ -536,24 +531,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
||||
}
|
||||
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
char path_md5_n[MD5_STR_LENGTH + 1];
|
||||
path_md5_n[MD5_STR_LENGTH] = '\0';
|
||||
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
char doc_id_n[SIST_DOC_ID_LEN + 1];
|
||||
doc_id_n[SIST_DOC_ID_LEN] = '\0';
|
||||
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
// do not delete archive virtual entries
|
||||
if (cJSON_GetObjectItem(document, "parent") == NULL
|
||||
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
|
||||
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
|
||||
&& !incremental_get(IncrementalCopyTable, doc_id)
|
||||
&& !incremental_get(IncrementalNewTable, doc_id)
|
||||
) {
|
||||
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
|
||||
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
|
||||
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
|
||||
zstd_write_string(doc_id, sizeof(doc_id_n));
|
||||
}
|
||||
}
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||
void incremental_delete(const char *del_filepath, const char *index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
|
||||
@@ -12,7 +12,7 @@ typedef struct line_processor {
|
||||
void (*func)(const char*, void*);
|
||||
} line_processor_t;
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
|
||||
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
@@ -24,7 +24,7 @@ void write_document(document_t *doc);
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor);
|
||||
|
||||
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
||||
|
||||
|
||||
@@ -52,22 +52,7 @@ void store_flush(store_t *store) {
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
if (key_len == MD5_DIGEST_LENGTH) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
|
||||
|
||||
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
|
||||
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
|
||||
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
|
||||
|
||||
} else {
|
||||
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
|
||||
}
|
||||
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
||||
@@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
|
||||
|
||||
job->vfile.info = *info;
|
||||
|
||||
memset(job->parent, 0, MD5_DIGEST_LENGTH);
|
||||
job->parent[0] = '\0';
|
||||
|
||||
job->vfile.filepath = job->filepath;
|
||||
job->vfile.read = fs_read;
|
||||
|
||||
@@ -118,7 +118,7 @@ void init_dir(const char *dirpath, scan_args_t* args) {
|
||||
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
|
||||
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
|
||||
} else {
|
||||
// genreate new index id based on timestamp
|
||||
// generate new index id based on timestamp
|
||||
unsigned char index_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
|
||||
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
|
||||
|
||||
@@ -69,7 +69,7 @@ void parse(void *arg) {
|
||||
doc->base = (short) job->base;
|
||||
|
||||
char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
|
||||
MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
|
||||
generate_doc_id(rel_path, doc->doc_id);
|
||||
|
||||
doc->meta_head = NULL;
|
||||
doc->meta_tail = NULL;
|
||||
@@ -77,10 +77,10 @@ void parse(void *arg) {
|
||||
doc->size = job->vfile.info.st_size;
|
||||
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
|
||||
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
|
||||
int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
|
||||
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
|
||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
|
||||
incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
|
||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
@@ -96,16 +96,14 @@ void parse(void *arg) {
|
||||
|
||||
if (ScanCtx.new_table != NULL) {
|
||||
pthread_mutex_lock(&ScanCtx.copy_table_mu);
|
||||
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
|
||||
incremental_mark_file(ScanCtx.new_table, doc->doc_id);
|
||||
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
|
||||
}
|
||||
|
||||
char *buf[MAGIC_BUF_SIZE];
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
|
||||
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
|
||||
}
|
||||
|
||||
if (job->vfile.info.st_size == 0) {
|
||||
@@ -218,10 +216,10 @@ void parse(void *arg) {
|
||||
abort:
|
||||
|
||||
//Parent meta
|
||||
if (!md5_digest_is_null(job->parent)) {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
|
||||
if (job->parent[0] != '\0') {
|
||||
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
|
||||
meta_parent->key = MetaParent;
|
||||
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
|
||||
strcpy(meta_parent->str_val, job->parent);
|
||||
APPEND_META((doc), meta_parent)
|
||||
|
||||
doc->has_parent = TRUE;
|
||||
|
||||
@@ -23,16 +23,19 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
|
||||
}
|
||||
char *json_str = cJSON_PrintUnformatted(json);
|
||||
|
||||
unsigned char path_md5[MD5_DIGEST_LENGTH];
|
||||
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
|
||||
path_md5);
|
||||
char assoc_doc_id[SIST_DOC_ID_LEN];
|
||||
|
||||
char path_md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
|
||||
char rel_path[PATH_MAX];
|
||||
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
|
||||
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
|
||||
*(rel_path + rel_path_len) = '\0';
|
||||
|
||||
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
generate_doc_id(rel_path, assoc_doc_id);
|
||||
|
||||
store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
|
||||
strlen(json_str) + 1);
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(json_str);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
@@ -53,7 +53,7 @@
|
||||
#include <ctype.h>
|
||||
#include "git_hash.h"
|
||||
|
||||
#define VERSION "2.11.7"
|
||||
#define VERSION "2.12.0"
|
||||
static const char *const Version = VERSION;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
|
||||
@@ -20,7 +20,7 @@ typedef struct {
|
||||
long count;
|
||||
} agg_t;
|
||||
|
||||
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
|
||||
void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL) {
|
||||
return;
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#define INDEX_TYPE_NDJSON "ndjson"
|
||||
|
||||
typedef struct index_descriptor {
|
||||
char id[MD5_STR_LENGTH];
|
||||
char id[SIST_INDEX_ID_LEN];
|
||||
char version[64];
|
||||
long timestamp;
|
||||
char root[PATH_MAX];
|
||||
|
||||
44
src/util.h
44
src/util.h
@@ -10,8 +10,6 @@
|
||||
#include "third-party/utf8.h/utf8.h"
|
||||
#include "libscan/scan.h"
|
||||
|
||||
#define MD5_STR_LENGTH 33
|
||||
|
||||
|
||||
char *abspath(const char *path);
|
||||
|
||||
@@ -94,40 +92,24 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
|
||||
|
||||
|
||||
__always_inline
|
||||
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
|
||||
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
|
||||
static void generate_doc_id(const char *rel_path, char *doc_id) {
|
||||
unsigned char md[MD5_DIGEST_LENGTH];
|
||||
|
||||
MD5((unsigned char *) rel_path, strlen(rel_path), md);
|
||||
buf2hex(md, sizeof(md), doc_id);
|
||||
}
|
||||
|
||||
|
||||
__always_inline
|
||||
static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
|
||||
char *ptr = malloc(SIST_DOC_ID_LEN);
|
||||
strcpy(ptr, doc_id);
|
||||
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
strcpy(ptr, path_md5);
|
||||
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
if (table != NULL) {
|
||||
char md5_str[MD5_STR_LENGTH];
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__always_inline
|
||||
static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
||||
if (table != NULL) {
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
|
||||
return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
@@ -138,9 +120,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
|
||||
* !!Not thread safe.
|
||||
*/
|
||||
__always_inline
|
||||
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
|
||||
char *ptr = malloc(MD5_STR_LENGTH);
|
||||
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
|
||||
static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
|
||||
char *ptr = malloc(SIST_DOC_ID_LEN);
|
||||
strcpy(ptr, doc_id);
|
||||
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
|
||||
|
||||
index_t *get_index_by_id(const char *index_id) {
|
||||
for (int i = WebCtx.index_count; i >= 0; i--) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
|
||||
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
|
||||
return &WebCtx.indices[i];
|
||||
}
|
||||
}
|
||||
@@ -70,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_index_id[SIST_INDEX_ID_LEN];
|
||||
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
|
||||
index_t *index = get_index_by_id(arg_md5);
|
||||
index_t *index = get_index_by_id(arg_index_id);
|
||||
if (index == NULL) {
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
const char *file;
|
||||
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
|
||||
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
|
||||
case 1:
|
||||
file = "treemap.csv";
|
||||
break;
|
||||
@@ -150,28 +150,25 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
int parse_tn_num = FALSE;
|
||||
int has_thumbnail_index = FALSE;
|
||||
|
||||
if (hm->uri.len != 68) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
|
||||
|
||||
if (hm->uri.len != 68 + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
parse_tn_num = TRUE;
|
||||
has_thumbnail_index = TRUE;
|
||||
}
|
||||
|
||||
char arg_file_md5[MD5_STR_LENGTH];
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
|
||||
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
|
||||
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
|
||||
unsigned char md5_buf[MD5_DIGEST_LENGTH];
|
||||
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
|
||||
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
store_t *store = get_store(arg_index);
|
||||
if (store == NULL) {
|
||||
@@ -183,16 +180,17 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
char *data;
|
||||
size_t data_len = 0;
|
||||
|
||||
if (parse_tn_num) {
|
||||
int tn_num = atoi(hm->uri.ptr + 68);
|
||||
if (has_thumbnail_index) {
|
||||
const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
|
||||
|
||||
char tn_key[sizeof(md5_buf) + sizeof(int)];
|
||||
memcpy(tn_key, md5_buf, sizeof(md5_buf));
|
||||
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
|
||||
char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
|
||||
|
||||
memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
|
||||
memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
|
||||
|
||||
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
|
||||
} else {
|
||||
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
|
||||
data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
|
||||
}
|
||||
|
||||
if (data_len != 0) {
|
||||
@@ -357,17 +355,17 @@ void index_info(struct mg_connection *nc) {
|
||||
|
||||
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
cJSON *doc = elastic_get_document(arg_md5);
|
||||
cJSON *doc = elastic_get_document(arg_doc_id);
|
||||
cJSON *source = cJSON_GetObjectItem(doc, "_source");
|
||||
|
||||
cJSON *index_id = cJSON_GetObjectItem(source, "index");
|
||||
@@ -393,17 +391,17 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
void file(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 2) {
|
||||
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
|
||||
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_md5[MD5_STR_LENGTH];
|
||||
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
|
||||
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_doc_id[SIST_DOC_ID_LEN];
|
||||
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
|
||||
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
|
||||
|
||||
const char *next = arg_md5;
|
||||
const char *next = arg_doc_id;
|
||||
cJSON *doc = NULL;
|
||||
cJSON *index_id = NULL;
|
||||
cJSON *source = NULL;
|
||||
@@ -454,7 +452,6 @@ void status(struct mg_connection *nc) {
|
||||
typedef struct {
|
||||
char *name;
|
||||
int delete;
|
||||
char *path_md5_str;
|
||||
char *doc_id;
|
||||
} tag_req_t;
|
||||
|
||||
@@ -474,12 +471,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
|
||||
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
|
||||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
|
||||
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
|
||||
return NULL;
|
||||
@@ -488,22 +479,21 @@ tag_req_t *parse_tag_request(cJSON *json) {
|
||||
tag_req_t *req = malloc(sizeof(tag_req_t));
|
||||
req->delete = arg_delete->valueint;
|
||||
req->name = arg_name->valuestring;
|
||||
req->path_md5_str = arg_path_md5->valuestring;
|
||||
req->doc_id = arg_doc_id->valuestring;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
if (hm->uri.len != MD5_STR_LENGTH + 4) {
|
||||
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
|
||||
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
|
||||
HTTP_REPLY_NOT_FOUND
|
||||
return;
|
||||
}
|
||||
|
||||
char arg_index[MD5_STR_LENGTH];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
|
||||
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
|
||||
char arg_index[SIST_INDEX_ID_LEN];
|
||||
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
|
||||
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
|
||||
|
||||
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
|
||||
LOG_DEBUG("serve.c", "Invalid tag request")
|
||||
@@ -535,7 +525,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
cJSON *arr = NULL;
|
||||
|
||||
size_t data_len = 0;
|
||||
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
|
||||
const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
|
||||
if (data_len == 0) {
|
||||
arr = cJSON_CreateArray();
|
||||
} else {
|
||||
@@ -595,7 +585,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
|
||||
}
|
||||
|
||||
char *json_str = cJSON_PrintUnformatted(arr);
|
||||
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
|
||||
store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
|
||||
store_flush(store);
|
||||
|
||||
free(arg_req);
|
||||
|
||||
2
src/web/static_generated.c
vendored
2
src/web/static_generated.c
vendored
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user