mirror of
https://github.com/simon987/sist2.git
synced 2025-12-12 15:08:53 +00:00
use sqlite to save index, major thread pool refactor
This commit is contained in:
@@ -1,9 +1,7 @@
|
||||
#include "src/ctx.h"
|
||||
#include "serialize.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/mime.h"
|
||||
|
||||
#include <zstd.h>
|
||||
|
||||
char *get_meta_key_text(enum metakey meta_key) {
|
||||
|
||||
@@ -79,7 +77,7 @@ char *get_meta_key_text(enum metakey meta_key) {
|
||||
case MetaChecksum:
|
||||
return "checksum";
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
|
||||
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,7 +173,7 @@ char *build_json_string(document_t *doc) {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key))
|
||||
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key));
|
||||
}
|
||||
|
||||
meta_line_t *tmp = meta;
|
||||
@@ -189,394 +187,10 @@ char *build_json_string(document_t *doc) {
|
||||
return json_str;
|
||||
}
|
||||
|
||||
static struct {
|
||||
FILE *out_file;
|
||||
size_t buf_out_size;
|
||||
|
||||
void *buf_out;
|
||||
|
||||
ZSTD_CCtx *cctx;
|
||||
} WriterCtx = {
|
||||
.out_file = NULL
|
||||
};
|
||||
|
||||
#define ZSTD_COMPRESSION_LEVEL 10
|
||||
|
||||
void initialize_writer_ctx(const char *file_path) {
|
||||
WriterCtx.out_file = fopen(file_path, "wb");
|
||||
|
||||
WriterCtx.buf_out_size = ZSTD_CStreamOutSize();
|
||||
WriterCtx.buf_out = malloc(WriterCtx.buf_out_size);
|
||||
|
||||
WriterCtx.cctx = ZSTD_createCCtx();
|
||||
|
||||
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL);
|
||||
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE);
|
||||
|
||||
LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path)
|
||||
}
|
||||
|
||||
void zstd_write_string(const char *string, const size_t len) {
|
||||
ZSTD_inBuffer input = {string, len, 0};
|
||||
|
||||
do {
|
||||
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
|
||||
ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue);
|
||||
|
||||
if (output.pos > 0) {
|
||||
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
|
||||
}
|
||||
} while (input.pos != input.size);
|
||||
}
|
||||
|
||||
void write_document_func(tpool_work_arg_shm_t *arg) {
|
||||
|
||||
const char *json_str = arg->arg;
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
char dstfile[PATH_MAX];
|
||||
snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path);
|
||||
initialize_writer_ctx(dstfile);
|
||||
}
|
||||
|
||||
zstd_write_string(json_str, arg->arg_size);
|
||||
}
|
||||
|
||||
void zstd_close() {
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup")
|
||||
return;
|
||||
}
|
||||
|
||||
size_t remaining;
|
||||
do {
|
||||
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
|
||||
remaining = ZSTD_endStream(WriterCtx.cctx, &output);
|
||||
|
||||
if (output.pos > 0) {
|
||||
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
|
||||
}
|
||||
} while (remaining != 0);
|
||||
|
||||
ZSTD_freeCCtx(WriterCtx.cctx);
|
||||
free(WriterCtx.buf_out);
|
||||
fclose(WriterCtx.out_file);
|
||||
|
||||
LOG_DEBUG("serialize.c", "End zstd stream & close index file")
|
||||
}
|
||||
|
||||
void writer_cleanup() {
|
||||
zstd_close();
|
||||
WriterCtx.out_file = NULL;
|
||||
}
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc) {
|
||||
cJSON *json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(json, "id", desc->id);
|
||||
cJSON_AddStringToObject(json, "version", desc->version);
|
||||
cJSON_AddStringToObject(json, "root", desc->root);
|
||||
cJSON_AddStringToObject(json, "name", desc->name);
|
||||
cJSON_AddStringToObject(json, "type", desc->type);
|
||||
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
|
||||
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
|
||||
|
||||
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
if (fd < 0) {
|
||||
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
|
||||
}
|
||||
char *str = cJSON_Print(json);
|
||||
size_t ret = write(fd, str, strlen(str));
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
|
||||
}
|
||||
free(str);
|
||||
close(fd);
|
||||
|
||||
cJSON_Delete(json);
|
||||
}
|
||||
|
||||
index_descriptor_t read_index_descriptor(char *path) {
|
||||
|
||||
struct stat info;
|
||||
stat(path, &info);
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if (fd == -1) {
|
||||
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
|
||||
}
|
||||
|
||||
char *buf = malloc(info.st_size + 1);
|
||||
size_t ret = read(fd, buf, info.st_size);
|
||||
if (ret == -1) {
|
||||
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
|
||||
}
|
||||
*(buf + info.st_size) = '\0';
|
||||
close(fd);
|
||||
|
||||
cJSON *json = cJSON_Parse(buf);
|
||||
|
||||
index_descriptor_t descriptor;
|
||||
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
|
||||
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
|
||||
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
|
||||
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
|
||||
descriptor.root_len = (short) strlen(descriptor.root);
|
||||
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
|
||||
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
|
||||
if (cJSON_GetObjectItem(json, "type") == NULL) {
|
||||
strcpy(descriptor.type, INDEX_TYPE_NDJSON);
|
||||
} else {
|
||||
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
|
||||
}
|
||||
|
||||
cJSON_Delete(json);
|
||||
free(buf);
|
||||
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
|
||||
void write_document(document_t *doc) {
|
||||
char *json_str = build_json_string(doc);
|
||||
|
||||
database_write_document(ProcData.index_db, doc, json_str);
|
||||
free(doc);
|
||||
const size_t json_str_len = strlen(json_str);
|
||||
|
||||
json_str = realloc(json_str, json_str_len + 1);
|
||||
*(json_str + json_str_len) = '\n';
|
||||
|
||||
tpool_work_arg_t arg = {
|
||||
.arg_size = json_str_len + 1,
|
||||
.arg = json_str
|
||||
};
|
||||
|
||||
tpool_add_work(ScanCtx.writer_pool, write_document_func, &arg);
|
||||
}
|
||||
|
||||
void thread_cleanup() {
|
||||
cleanup_parse();
|
||||
cleanup_font();
|
||||
}
|
||||
|
||||
void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) {
|
||||
|
||||
cJSON *document = cJSON_Parse(line);
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
cJSON_AddStringToObject(document, "index", index_id);
|
||||
|
||||
// Load meta from sidecar files
|
||||
cJSON *meta_obj = NULL;
|
||||
if (IndexCtx.meta != NULL) {
|
||||
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
|
||||
if (meta_string != NULL) {
|
||||
meta_obj = cJSON_Parse(meta_string);
|
||||
|
||||
cJSON *child;
|
||||
for (child = meta_obj->child; child != NULL; child = child->next) {
|
||||
char meta_key[4096];
|
||||
strcpy(meta_key, child->string);
|
||||
cJSON_DeleteItemFromObject(document, meta_key);
|
||||
cJSON_AddItemReferenceToObject(document, meta_key, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Load tags from tags DB
|
||||
if (IndexCtx.tags != NULL) {
|
||||
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
|
||||
if (tags_string != NULL) {
|
||||
cJSON *tags_arr = cJSON_Parse(tags_string);
|
||||
cJSON_DeleteItemFromObject(document, "tag");
|
||||
cJSON_AddItemToObject(document, "tag", tags_arr);
|
||||
}
|
||||
}
|
||||
|
||||
func(document, path_md5_str);
|
||||
cJSON_DeleteItemFromObject(document, "_id");
|
||||
cJSON_Delete(document);
|
||||
if (meta_obj) {
|
||||
cJSON_Delete(meta_obj);
|
||||
}
|
||||
}
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor) {
|
||||
dyn_buffer_t buf = dyn_buffer_create();
|
||||
|
||||
// Initialize zstd things
|
||||
FILE *file = fopen(path, "rb");
|
||||
|
||||
size_t const buf_in_size = ZSTD_DStreamInSize();
|
||||
void *const buf_in = malloc(buf_in_size);
|
||||
|
||||
size_t const buf_out_size = ZSTD_DStreamOutSize();
|
||||
void *const buf_out = malloc(buf_out_size);
|
||||
|
||||
ZSTD_DCtx *const dctx = ZSTD_createDCtx();
|
||||
|
||||
size_t read;
|
||||
size_t last_ret = 0;
|
||||
while ((read = fread(buf_in, 1, buf_in_size, file))) {
|
||||
ZSTD_inBuffer input = {buf_in, read, 0};
|
||||
|
||||
while (input.pos < input.size) {
|
||||
ZSTD_outBuffer output = {buf_out, buf_out_size, 0};
|
||||
|
||||
size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
|
||||
|
||||
for (int i = 0; i < output.pos; i++) {
|
||||
char c = ((char *) output.dst)[i];
|
||||
|
||||
if (c == '\n') {
|
||||
dyn_buffer_write_char(&buf, '\0');
|
||||
processor.func(buf.buf, processor.data);
|
||||
buf.cur = 0;
|
||||
} else {
|
||||
dyn_buffer_write_char(&buf, c);
|
||||
}
|
||||
}
|
||||
|
||||
last_ret = ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_ret != 0) {
|
||||
/* The last return value from ZSTD_decompressStream did not end on a
|
||||
* frame, but we reached the end of the file! We assume this is an
|
||||
* error, and the input was truncated.
|
||||
*/
|
||||
LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret)
|
||||
}
|
||||
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(buf_in);
|
||||
free(buf_out);
|
||||
|
||||
dyn_buffer_destroy(&buf);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void read_index_ndjson(const char *line, void *_data) {
|
||||
void **data = _data;
|
||||
const char *index_id = data[0];
|
||||
index_func func = data[1];
|
||||
read_index_bin_handle_line(line, index_id, func);
|
||||
}
|
||||
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
|
||||
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
|
||||
read_lines(path, (line_processor_t) {
|
||||
.data = (void *[2]) {(void *) index_id, func},
|
||||
.func = read_index_ndjson,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalReadTable = NULL;
|
||||
|
||||
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
|
||||
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
|
||||
|
||||
incremental_put(IncrementalReadTable, path_md5_str, mtime);
|
||||
}
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
|
||||
IncrementalReadTable = table;
|
||||
read_index(filepath, desc->id, desc->type, json_put_incremental);
|
||||
}
|
||||
|
||||
static __thread GHashTable *IncrementalCopyTable = NULL;
|
||||
static __thread GHashTable *IncrementalNewTable = NULL;
|
||||
static __thread store_t *IncrementalCopySourceStore = NULL;
|
||||
static __thread store_t *IncrementalCopyDestinationStore = NULL;
|
||||
|
||||
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
|
||||
// Copy index line
|
||||
cJSON_DeleteItemFromObject(document, "index");
|
||||
char *json_str = cJSON_PrintUnformatted(document);
|
||||
const size_t json_str_len = strlen(json_str);
|
||||
|
||||
json_str = realloc(json_str, json_str_len + 1);
|
||||
*(json_str + json_str_len) = '\n';
|
||||
|
||||
// Copy tn store contents
|
||||
size_t buf_len;
|
||||
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
// Also copy additional thumbnails
|
||||
if (cJSON_GetObjectItem(document, "thumbnail") != NULL) {
|
||||
const int thumbnail_count = cJSON_GetObjectItem(document, "thumbnail")->valueint;
|
||||
|
||||
for (int i = 1; i < thumbnail_count; i++) {
|
||||
char tn_key[SIST_DOC_ID_LEN + sizeof(char) * 4];
|
||||
|
||||
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc_id, i);
|
||||
|
||||
buf = store_read(IncrementalCopySourceStore, tn_key, sizeof(tn_key), &buf_len);
|
||||
if (buf_len != 0) {
|
||||
store_write(IncrementalCopyDestinationStore, tn_key, sizeof(tn_key), buf, buf_len);
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
zstd_write_string(json_str, json_str_len + 1);
|
||||
free(json_str);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy items from an index that are in the copy_table. Also copies from
|
||||
* the store.
|
||||
*/
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
initialize_writer_ctx(dst_filepath);
|
||||
}
|
||||
|
||||
IncrementalCopyTable = copy_table;
|
||||
IncrementalCopySourceStore = store;
|
||||
IncrementalCopyDestinationStore = dst_store;
|
||||
|
||||
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
|
||||
}
|
||||
|
||||
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
|
||||
|
||||
char doc_id_n[SIST_DOC_ID_LEN + 1];
|
||||
doc_id_n[SIST_DOC_ID_LEN] = '\0';
|
||||
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
|
||||
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
|
||||
|
||||
// do not delete archive virtual entries
|
||||
if (cJSON_GetObjectItem(document, "parent") == NULL
|
||||
&& !incremental_get(IncrementalCopyTable, doc_id)
|
||||
&& !incremental_get(IncrementalNewTable, doc_id)
|
||||
) {
|
||||
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
|
||||
zstd_write_string(doc_id, sizeof(doc_id_n));
|
||||
}
|
||||
}
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char *index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table) {
|
||||
|
||||
if (WriterCtx.out_file == NULL) {
|
||||
initialize_writer_ctx(del_filepath);
|
||||
}
|
||||
|
||||
IncrementalCopyTable = copy_table;
|
||||
IncrementalNewTable = new_table;
|
||||
|
||||
read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
|
||||
}
|
||||
free(json_str);
|
||||
}
|
||||
@@ -2,55 +2,7 @@
|
||||
#define SIST2_SERIALIZE_H
|
||||
|
||||
#include "src/sist.h"
|
||||
#include "store.h"
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <glib.h>
|
||||
|
||||
typedef struct line_processor {
|
||||
void* data;
|
||||
void (*func)(const char*, void*);
|
||||
} line_processor_t;
|
||||
|
||||
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
|
||||
|
||||
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
|
||||
const char *dst_filepath, GHashTable *copy_table);
|
||||
|
||||
void incremental_delete(const char *del_filepath, const char* index_filepath,
|
||||
GHashTable *copy_table, GHashTable *new_table);
|
||||
|
||||
void write_document(document_t *doc);
|
||||
|
||||
void read_lines(const char *path, const line_processor_t processor);
|
||||
|
||||
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
|
||||
|
||||
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
|
||||
|
||||
/**
|
||||
* Must be called after write_document
|
||||
*/
|
||||
void thread_cleanup();
|
||||
|
||||
void writer_cleanup();
|
||||
|
||||
void write_index_descriptor(char *path, index_descriptor_t *desc);
|
||||
|
||||
index_descriptor_t read_index_descriptor(char *path);
|
||||
|
||||
// caller ensures char file_path[PATH_MAX]
|
||||
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
|
||||
if (access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} else { \
|
||||
action_main_fail; \
|
||||
} \
|
||||
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
|
||||
if ((cond_original) && access(file_path, R_OK) == 0) { \
|
||||
action_ok; \
|
||||
} \
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
232
src/io/store.c
232
src/io/store.c
@@ -1,232 +0,0 @@
|
||||
#include <sys/mman.h>
|
||||
#include "store.h"
|
||||
#include "src/ctx.h"
|
||||
|
||||
//#define SIST_FAKE_STORE 1
|
||||
|
||||
void open_env(const char *path, MDB_env **env, MDB_dbi *dbi) {
|
||||
mdb_env_create(env);
|
||||
|
||||
int open_ret = mdb_env_open(*env,
|
||||
path,
|
||||
MDB_WRITEMAP | MDB_MAPASYNC,
|
||||
S_IRUSR | S_IWUSR
|
||||
);
|
||||
|
||||
if (open_ret != 0) {
|
||||
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
|
||||
}
|
||||
|
||||
MDB_txn *txn;
|
||||
mdb_txn_begin(*env, NULL, 0, &txn);
|
||||
mdb_dbi_open(txn, NULL, 0, dbi);
|
||||
mdb_txn_commit(txn);
|
||||
}
|
||||
|
||||
store_t *store_create(const char *path, size_t chunk_size) {
|
||||
store_t *store = calloc(1, sizeof(struct store_t));
|
||||
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
strcpy(store->path, path);
|
||||
|
||||
MDB_env *env;
|
||||
MDB_dbi dbi;
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
store->chunk_size = chunk_size;
|
||||
|
||||
store->shm = mmap(NULL, sizeof(*store->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
open_env(path, &env, &dbi);
|
||||
|
||||
store->shm->size = (size_t) store->chunk_size;
|
||||
mdb_env_set_mapsize(env, store->shm->size);
|
||||
|
||||
// Close, child processes will open the environment again
|
||||
mdb_env_close(env);
|
||||
#endif
|
||||
|
||||
return store;
|
||||
}
|
||||
|
||||
void store_destroy(store_t *store) {
|
||||
|
||||
LOG_DEBUG("store.c", "store_destroy()")
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
munmap(store->shm, sizeof(*store->shm));
|
||||
|
||||
mdb_dbi_close(store->proc.env, store->proc.dbi);
|
||||
mdb_env_close(store->proc.env);
|
||||
#endif
|
||||
free(store);
|
||||
}
|
||||
|
||||
void store_flush(store_t *store) {
|
||||
mdb_env_sync(store->proc.env, TRUE);
|
||||
}
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
|
||||
|
||||
ScanCtx.stat_tn_size += buf_len;
|
||||
|
||||
if (LogCtx.very_verbose) {
|
||||
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
|
||||
}
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
|
||||
if (store->proc.env == NULL) {
|
||||
open_env(store->path, &store->proc.env, &store->proc.dbi);
|
||||
LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path)
|
||||
}
|
||||
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
|
||||
MDB_val mdb_value;
|
||||
mdb_value.mv_data = buf;
|
||||
mdb_value.mv_size = buf_len;
|
||||
|
||||
MDB_txn *txn;
|
||||
|
||||
int db_full = FALSE;
|
||||
int put_ret = 0;
|
||||
int should_abort_transaction = FALSE;
|
||||
int should_increase_size = TRUE;
|
||||
|
||||
int begin_ret = mdb_txn_begin(store->proc.env, NULL, 0, &txn);
|
||||
|
||||
if (begin_ret == MDB_MAP_RESIZED) {
|
||||
// mapsize was increased by another process. We don't need to increase the size again, but we need
|
||||
// to update the size of the environment for the current process.
|
||||
db_full = TRUE;
|
||||
should_increase_size = FALSE;
|
||||
} else {
|
||||
put_ret = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0);
|
||||
|
||||
if (put_ret == MDB_MAP_FULL) {
|
||||
// Database is full, we need to increase the environment size
|
||||
db_full = TRUE;
|
||||
should_abort_transaction = TRUE;
|
||||
} else {
|
||||
int commit_ret = mdb_txn_commit(txn);
|
||||
|
||||
if (commit_ret == MDB_MAP_FULL) {
|
||||
db_full = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (db_full) {
|
||||
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->shm->size)
|
||||
|
||||
if (should_abort_transaction) {
|
||||
mdb_txn_abort(txn);
|
||||
}
|
||||
|
||||
// Cannot resize when there is an opened transaction in this process.
|
||||
// Resize take effect on the next commit.
|
||||
if (should_increase_size) {
|
||||
store->shm->size += store->chunk_size;
|
||||
}
|
||||
int resize_ret = mdb_env_set_mapsize(store->proc.env, store->shm->size);
|
||||
if (resize_ret != 0) {
|
||||
LOG_ERRORF("store.c", "mdb_env_set_mapsize() failed: %s", mdb_strerror(resize_ret))
|
||||
}
|
||||
mdb_txn_begin(store->proc.env, NULL, 0, &txn);
|
||||
int put_ret_retry = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0);
|
||||
|
||||
if (put_ret_retry != 0) {
|
||||
LOG_ERRORF("store.c", "mdb_put() (retry) failed: %s", mdb_strerror(put_ret_retry))
|
||||
}
|
||||
|
||||
int ret = mdb_txn_commit(txn);
|
||||
if (ret != 0) {
|
||||
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
|
||||
store->path, mdb_strerror(ret), ret,
|
||||
ret, put_ret_retry)
|
||||
}
|
||||
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->shm->size)
|
||||
} else if (put_ret != 0) {
|
||||
LOG_ERRORF("store.c", "mdb_put() failed: %s", mdb_strerror(put_ret))
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len) {
|
||||
char *buf = NULL;
|
||||
|
||||
#if (SIST_FAKE_STORE != 1)
|
||||
if (store->proc.env == NULL) {
|
||||
open_env(store->path, &store->proc.env, &store->proc.dbi);
|
||||
}
|
||||
|
||||
MDB_val mdb_key;
|
||||
mdb_key.mv_data = key;
|
||||
mdb_key.mv_size = key_len;
|
||||
|
||||
MDB_val mdb_value;
|
||||
|
||||
MDB_txn *txn;
|
||||
mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
int get_ret = mdb_get(txn, store->proc.dbi, &mdb_key, &mdb_value);
|
||||
|
||||
if (get_ret == MDB_NOTFOUND) {
|
||||
*return_value_len = 0;
|
||||
} else {
|
||||
*return_value_len = mdb_value.mv_size;
|
||||
buf = malloc(mdb_value.mv_size);
|
||||
memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
|
||||
}
|
||||
|
||||
mdb_txn_abort(txn);
|
||||
#endif
|
||||
return buf;
|
||||
}
|
||||
|
||||
GHashTable *store_read_all(store_t *store) {
|
||||
|
||||
if (store->proc.env == NULL) {
|
||||
open_env(store->path, &store->proc.env, &store->proc.dbi);
|
||||
LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path)
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
|
||||
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
|
||||
|
||||
MDB_txn *txn = NULL;
|
||||
mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn);
|
||||
|
||||
MDB_cursor *cur = NULL;
|
||||
mdb_cursor_open(txn, store->proc.dbi, &cur);
|
||||
|
||||
MDB_val key;
|
||||
MDB_val value;
|
||||
|
||||
while (mdb_cursor_get(cur, &key, &value, MDB_NEXT) == 0) {
|
||||
char *key_str = malloc(key.mv_size);
|
||||
memcpy(key_str, key.mv_data, key.mv_size);
|
||||
char *val_str = malloc(value.mv_size);
|
||||
memcpy(val_str, value.mv_data, value.mv_size);
|
||||
|
||||
g_hash_table_insert(table, key_str, val_str);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
const char *path;
|
||||
mdb_env_get_path(store->proc.env, &path);
|
||||
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path)
|
||||
|
||||
mdb_cursor_close(cur);
|
||||
mdb_txn_abort(txn);
|
||||
return table;
|
||||
}
|
||||
|
||||
|
||||
void store_copy(store_t *store, const char *destination) {
|
||||
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
|
||||
mdb_env_copy(store->proc.env, destination);
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
#ifndef SIST2_STORE_H
|
||||
#define SIST2_STORE_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <lmdb.h>
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
#define STORE_SIZE_TN (1024 * 1024 * 5)
|
||||
#define STORE_SIZE_TAG (1024 * 1024)
|
||||
#define STORE_SIZE_META STORE_SIZE_TAG
|
||||
|
||||
|
||||
typedef struct store_t {
|
||||
char path[PATH_MAX];
|
||||
size_t chunk_size;
|
||||
|
||||
struct {
|
||||
MDB_dbi dbi;
|
||||
MDB_env *env;
|
||||
} proc;
|
||||
|
||||
struct {
|
||||
size_t size;
|
||||
} *shm;
|
||||
} store_t;
|
||||
|
||||
store_t *store_create(const char *path, size_t chunk_size);
|
||||
|
||||
void store_destroy(store_t *store);
|
||||
|
||||
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len);
|
||||
|
||||
void store_flush(store_t *store);
|
||||
|
||||
char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len);
|
||||
|
||||
GHashTable *store_read_all(store_t *store);
|
||||
|
||||
void store_copy(store_t *store, const char *destination);
|
||||
|
||||
#endif
|
||||
@@ -1,46 +1,12 @@
|
||||
#include "walk.h"
|
||||
#include "src/ctx.h"
|
||||
#include "src/parsing/parse.h"
|
||||
#include "src/parsing/fs_util.h"
|
||||
|
||||
#include <ftw.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)
|
||||
|
||||
__always_inline
|
||||
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
|
||||
int len = (int) strlen(filepath);
|
||||
parse_job_t *job = malloc(sizeof(parse_job_t));
|
||||
|
||||
strcpy(job->filepath, filepath);
|
||||
job->base = base;
|
||||
char *p = strrchr(filepath + base, '.');
|
||||
if (p != NULL) {
|
||||
job->ext = (int) (p - filepath + 1);
|
||||
} else {
|
||||
job->ext = len;
|
||||
}
|
||||
|
||||
job->vfile.st_size = info->st_size;
|
||||
job->vfile.st_mode = info->st_mode;
|
||||
job->vfile.mtime = (int) info->st_mtim.tv_sec;
|
||||
|
||||
job->parent[0] = '\0';
|
||||
|
||||
memcpy(job->vfile.filepath, job->filepath, sizeof(job->vfile.filepath));
|
||||
job->vfile.read = fs_read;
|
||||
// Filesystem reads are always rewindable
|
||||
job->vfile.read_rewindable = fs_read;
|
||||
job->vfile.reset = fs_reset;
|
||||
job->vfile.close = fs_close;
|
||||
job->vfile.fd = -1;
|
||||
job->vfile.is_fs_file = TRUE;
|
||||
job->vfile.has_checksum = FALSE;
|
||||
job->vfile.rewind_buffer_size = 0;
|
||||
job->vfile.rewind_buffer = NULL;
|
||||
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
int sub_strings[30];
|
||||
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
|
||||
@@ -55,7 +21,7 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath)
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
@@ -69,13 +35,13 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
|
||||
}
|
||||
|
||||
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
|
||||
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
|
||||
parse_job_t *job = create_parse_job(filepath, (int) info->st_mtim.tv_sec, info->st_size);
|
||||
|
||||
tpool_work_arg_t arg = {
|
||||
.arg_size = sizeof(parse_job_t),
|
||||
.arg = job
|
||||
};
|
||||
tpool_add_work(ScanCtx.pool, parse, &arg);
|
||||
tpool_add_work(ScanCtx.pool, &(job_t) {
|
||||
.type = JOB_PARSE_JOB,
|
||||
.parse_job = job
|
||||
});
|
||||
free(job);
|
||||
}
|
||||
|
||||
return FTW_CONTINUE;
|
||||
@@ -116,7 +82,7 @@ int iterate_file_list(void *input_file) {
|
||||
}
|
||||
|
||||
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
|
||||
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path);
|
||||
|
||||
if (S_ISREG(info.st_mode)) {
|
||||
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
|
||||
@@ -131,16 +97,14 @@ int iterate_file_list(void *input_file) {
|
||||
LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
|
||||
}
|
||||
|
||||
int base = (int) (strrchr(buf, '/') - buf) + 1;
|
||||
|
||||
parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
|
||||
parse_job_t *job = create_parse_job(absolute_path, (int) info.st_mtim.tv_sec, info.st_size);
|
||||
free(absolute_path);
|
||||
|
||||
tpool_work_arg_t arg = {
|
||||
.arg = job,
|
||||
.arg_size = sizeof(parse_job_t)
|
||||
};
|
||||
tpool_add_work(ScanCtx.pool, parse, &arg);
|
||||
tpool_add_work(ScanCtx.pool, &(job_t) {
|
||||
.type = JOB_PARSE_JOB,
|
||||
.parse_job = job
|
||||
});
|
||||
free(job);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user