process pool mostly works, still WIP

This commit is contained in:
2023-03-09 22:11:21 -05:00
parent 8c662bb8f8
commit f8abffba81
25 changed files with 1219 additions and 267 deletions

View File

@@ -197,7 +197,7 @@ static struct {
ZSTD_CCtx *cctx;
} WriterCtx = {
.out_file = NULL
.out_file = NULL
};
#define ZSTD_COMPRESSION_LEVEL 10
@@ -229,7 +229,9 @@ void zstd_write_string(const char *string, const size_t len) {
} while (input.pos != input.size);
}
void write_document_func(void *arg) {
void write_document_func(tpool_work_arg_shm_t *arg) {
const char *json_str = arg->arg;
if (WriterCtx.out_file == NULL) {
char dstfile[PATH_MAX];
@@ -237,17 +239,7 @@ void write_document_func(void *arg) {
initialize_writer_ctx(dstfile);
}
document_t *doc = arg;
char *json_str = build_json_string(doc);
const size_t json_str_len = strlen(json_str);
json_str = realloc(json_str, json_str_len + 1);
*(json_str + json_str_len) = '\n';
zstd_write_string(json_str, json_str_len + 1);
free(json_str);
zstd_write_string(json_str, arg->arg_size);
}
void zstd_close() {
@@ -345,7 +337,19 @@ index_descriptor_t read_index_descriptor(char *path) {
void write_document(document_t *doc) {
tpool_add_work(ScanCtx.writer_pool, write_document_func, doc);
char *json_str = build_json_string(doc);
free(doc);
const size_t json_str_len = strlen(json_str);
json_str = realloc(json_str, json_str_len + 1);
*(json_str + json_str_len) = '\n';
tpool_work_arg_t arg = {
.arg_size = json_str_len + 1,
.arg = json_str
};
tpool_add_work(ScanCtx.writer_pool, write_document_func, &arg);
}
void thread_cleanup() {

View File

@@ -1,18 +1,13 @@
#include <sys/mman.h>
#include "store.h"
#include "src/ctx.h"
store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
strcpy(store->path, path);
//#define SIST_FAKE_STORE 1
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
void open_env(const char *path, MDB_env **env, MDB_dbi *dbi) {
mdb_env_create(env);
mdb_env_create(&store->env);
int open_ret = mdb_env_open(store->env,
int open_ret = mdb_env_open(*env,
path,
MDB_WRITEMAP | MDB_MAPASYNC,
S_IRUSR | S_IWUSR
@@ -22,14 +17,33 @@ store_t *store_create(const char *path, size_t chunk_size) {
LOG_FATALF("store.c", "Error while opening store: %s (%s)\n", mdb_strerror(open_ret), path)
}
store->size = (size_t) store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
// Open dbi
MDB_txn *txn;
mdb_txn_begin(store->env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, &store->dbi);
mdb_txn_begin(*env, NULL, 0, &txn);
mdb_dbi_open(txn, NULL, 0, dbi);
mdb_txn_commit(txn);
}
store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = calloc(1, sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
strcpy(store->path, path);
MDB_env *env;
MDB_dbi dbi;
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
store->shared_memory = mmap(NULL, sizeof(*store->shm), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
store->shm = store->shared_memory;
open_env(path, &env, &dbi);
store->shm->size = (size_t) store->chunk_size;
mdb_env_set_mapsize(env, store->shm->size);
// Close, child processes will open the environment again
mdb_env_close(env);
#endif
return store;
@@ -37,26 +51,35 @@ store_t *store_create(const char *path, size_t chunk_size) {
void store_destroy(store_t *store) {
LOG_DEBUG("store.c", "store_destroy()")
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock);
mdb_dbi_close(store->env, store->dbi);
mdb_env_close(store->env);
munmap(store->shared_memory, sizeof(*store->shm));
mdb_dbi_close(store->proc.env, store->proc.dbi);
mdb_env_close(store->proc.env);
#endif
free(store);
}
void store_flush(store_t *store) {
mdb_env_sync(store->env, TRUE);
mdb_env_sync(store->proc.env, TRUE);
}
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
ScanCtx.stat_tn_size += buf_len;
if (LogCtx.very_verbose) {
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
}
#if (SIST_FAKE_STORE != 1)
if (store->proc.env == NULL) {
open_env(store->path, &store->proc.env, &store->proc.dbi);
LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path)
}
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -66,70 +89,80 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
mdb_value.mv_size = buf_len;
MDB_txn *txn;
pthread_rwlock_rdlock(&store->lock);
mdb_txn_begin(store->env, NULL, 0, &txn);
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size += buf_len;
int db_full = FALSE;
int put_ret = 0;
int should_abort_transaction = FALSE;
int should_increase_size = TRUE;
if (put_ret == MDB_MAP_FULL) {
int begin_ret = mdb_txn_begin(store->proc.env, NULL, 0, &txn);
if (begin_ret == MDB_MAP_RESIZED) {
// mapsize was increased by another process. We don't need to increase the size again, but we need
// to update the size of the environment for the current process.
db_full = TRUE;
should_abort_transaction = TRUE;
should_increase_size = FALSE;
} else {
int commit_ret = mdb_txn_commit(txn);
put_ret = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0);
if (commit_ret == MDB_MAP_FULL) {
if (put_ret == MDB_MAP_FULL) {
// Database is full, we need to increase the environment size
db_full = TRUE;
should_abort_transaction = TRUE;
} else {
int commit_ret = mdb_txn_commit(txn);
if (commit_ret == MDB_MAP_FULL) {
db_full = TRUE;
}
}
}
if (db_full) {
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
LOG_DEBUGF("store.c", "Updating mdb mapsize to %lu bytes", store->shm->size)
if (should_abort_transaction) {
mdb_txn_abort(txn);
}
pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction.
// Cannot resize when there is an opened transaction in this process.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += store->chunk_size;
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
if (resize_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
if (should_increase_size) {
store->shm->size += store->chunk_size;
}
mdb_txn_begin(store->env, NULL, 0, &txn);
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
int resize_ret = mdb_env_set_mapsize(store->proc.env, store->shm->size);
if (resize_ret != 0) {
LOG_ERRORF("store.c", "mdb_env_set_mapsize() failed: %s", mdb_strerror(resize_ret))
}
mdb_txn_begin(store->proc.env, NULL, 0, &txn);
int put_ret_retry = mdb_put(txn, store->proc.dbi, &mdb_key, &mdb_value, 0);
if (put_ret_retry != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
LOG_ERRORF("store.c", "mdb_put() (retry) failed: %s", mdb_strerror(put_ret_retry))
}
int ret = mdb_txn_commit(txn);
if (ret != 0) {
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
store->path, mdb_strerror(ret), ret,
put_ret, put_ret_retry);
ret, put_ret_retry)
}
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
LOG_DEBUGF("store.c", "Updated mdb mapsize to %lu bytes", store->shm->size)
} else if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
LOG_ERRORF("store.c", "mdb_put() failed: %s", mdb_strerror(put_ret))
}
pthread_rwlock_unlock(&store->lock);
#endif
}
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen) {
char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len) {
char *buf = NULL;
#if (SIST_FAKE_STORE != 1)
if (store->proc.env == NULL) {
open_env(store->path, &store->proc.env, &store->proc.dbi);
}
MDB_val mdb_key;
mdb_key.mv_data = key;
mdb_key.mv_size = key_len;
@@ -137,14 +170,14 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
MDB_val mdb_value;
MDB_txn *txn;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn);
int get_ret = mdb_get(txn, store->dbi, &mdb_key, &mdb_value);
int get_ret = mdb_get(txn, store->proc.dbi, &mdb_key, &mdb_value);
if (get_ret == MDB_NOTFOUND) {
*ret_vallen = 0;
*return_value_len = 0;
} else {
*ret_vallen = mdb_value.mv_size;
*return_value_len = mdb_value.mv_size;
buf = malloc(mdb_value.mv_size);
memcpy(buf, mdb_value.mv_data, mdb_value.mv_size);
}
@@ -156,15 +189,20 @@ char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen)
GHashTable *store_read_all(store_t *store) {
if (store->proc.env == NULL) {
open_env(store->path, &store->proc.env, &store->proc.dbi);
LOG_DEBUGF("store.c", "Opening mdb environment %s", store->path)
}
int count = 0;
GHashTable *table = g_hash_table_new_full(g_str_hash, g_str_equal, free, free);
MDB_txn *txn = NULL;
mdb_txn_begin(store->env, NULL, MDB_RDONLY, &txn);
mdb_txn_begin(store->proc.env, NULL, MDB_RDONLY, &txn);
MDB_cursor *cur = NULL;
mdb_cursor_open(txn, store->dbi, &cur);
mdb_cursor_open(txn, store->proc.dbi, &cur);
MDB_val key;
MDB_val value;
@@ -180,8 +218,8 @@ GHashTable *store_read_all(store_t *store) {
}
const char *path;
mdb_env_get_path(store->env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);
mdb_env_get_path(store->proc.env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path)
mdb_cursor_close(cur);
mdb_txn_abort(txn);
@@ -191,5 +229,5 @@ GHashTable *store_read_all(store_t *store) {
void store_copy(store_t *store, const char *destination) {
mkdir(destination, S_IWUSR | S_IRUSR | S_IXUSR);
mdb_env_copy(store->env, destination);
mdb_env_copy(store->proc.env, destination);
}

View File

@@ -10,14 +10,20 @@
#define STORE_SIZE_TAG (1024 * 1024)
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
char path[PATH_MAX];
char *tmp_path;
MDB_dbi dbi;
MDB_env *env;
size_t size;
size_t chunk_size;
pthread_rwlock_t lock;
void *shared_memory;
struct {
MDB_dbi dbi;
MDB_env *env;
} proc;
struct {
size_t size;
} *shm;
} store_t;
store_t *store_create(const char *path, size_t chunk_size);
@@ -28,7 +34,7 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
void store_flush(store_t *store);
char *store_read(store_t *store, char *key, size_t key_len, size_t *ret_vallen);
char *store_read(store_t *store, char *key, size_t key_len, size_t *return_value_len);
GHashTable *store_read_all(store_t *store);

View File

@@ -20,11 +20,13 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->ext = len;
}
job->vfile.info = *info;
job->vfile.st_size = info->st_size;
job->vfile.st_mode = info->st_mode;
job->vfile.mtime = (int) info->st_mtim.tv_sec;
job->parent[0] = '\0';
job->vfile.filepath = job->filepath;
memcpy(job->vfile.filepath, job->filepath, sizeof(job->vfile.filepath));
job->vfile.read = fs_read;
// Filesystem reads are always rewindable
job->vfile.read_rewindable = fs_read;
@@ -68,7 +70,12 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
parse_job_t *job = create_fs_parse_job(filepath, info, ftw->base);
tpool_add_work(ScanCtx.pool, parse, job);
tpool_work_arg_t arg = {
.arg_size = sizeof(parse_job_t),
.arg = job
};
tpool_add_work(ScanCtx.pool, parse, &arg);
}
return FTW_CONTINUE;
@@ -128,7 +135,12 @@ int iterate_file_list(void *input_file) {
parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
free(absolute_path);
tpool_add_work(ScanCtx.pool, parse, job);
tpool_work_arg_t arg = {
.arg = job,
.arg_size = sizeof(parse_job_t)
};
tpool_add_work(ScanCtx.pool, parse, &arg);
}
return 0;