web UI rewrite, switch to ndjson.zst index format

This commit is contained in:
2021-09-05 09:49:07 -04:00
parent 391d8ed9d9
commit f4e1d90a6b
133 changed files with 34220 additions and 4988 deletions

View File

@@ -3,101 +3,7 @@
#include "src/parsing/parse.h"
#include "src/parsing/mime.h"
static __thread int index_fd = -1;
typedef struct {
unsigned char path_md5[MD5_DIGEST_LENGTH];
unsigned long size;
unsigned int mime;
int mtime;
short base;
short ext;
char has_parent;
} line_t;
#define META_NEXT 0xFFFF
void skip_meta(FILE *file) {
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
while (key != META_NEXT) {
if (IS_META_INT(key)) {
fseek(file, sizeof(int), SEEK_CUR);
} else if (IS_META_LONG(key)) {
fseek(file, sizeof(long), SEEK_CUR);
} else {
while ((getc(file))) {}
}
fread(&key, sizeof(uint16_t), 1, file);
}
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "type", desc->type);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
int ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str);
close(fd);
cJSON_Delete(json);
}
index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
}
char *buf = malloc(info.st_size + 1);
size_t ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0';
close(fd);
cJSON *json = cJSON_Parse(buf);
index_descriptor_t descriptor;
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_BIN);
} else {
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
}
cJSON_Delete(json);
free(buf);
return descriptor;
}
#include <zstd.h>
char *get_meta_key_text(enum metakey meta_key) {
@@ -173,318 +79,426 @@ char *get_meta_key_text(enum metakey meta_key) {
}
}
char *build_json_string(document_t *doc) {
cJSON *json = cJSON_CreateObject();
int buffer_size_guess = 8192;
void write_document(document_t *doc) {
if (index_fd == -1) {
char dstfile[PATH_MAX];
pthread_t self = pthread_self();
snprintf(dstfile, PATH_MAX, "%s_index_%lu", ScanCtx.index.path, self);
index_fd = open(dstfile, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
if (index_fd == -1) {
perror("open");
}
const char *mime_text = mime_get_mime_text(doc->mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(json, "mime");
} else {
cJSON_AddStringToObject(json, "mime", mime_text);
}
dyn_buffer_t buf = dyn_buffer_create();
cJSON_AddNumberToObject(json, "size", (double) doc->size);
cJSON_AddNumberToObject(json, "mtime", doc->mtime);
// Ignore root directory in the file path
doc->ext = (short) (doc->ext - ScanCtx.index.desc.root_len);
doc->base = (short) (doc->base - ScanCtx.index.desc.root_len);
doc->filepath += ScanCtx.index.desc.root_len;
char *filepath = doc->filepath + ScanCtx.index.desc.root_len;
dyn_buffer_write(&buf, doc, sizeof(line_t));
dyn_buffer_write_str(&buf, doc->filepath);
cJSON_AddStringToObject(json, "extension", filepath + doc->ext);
// Remove extension
if (*(filepath + doc->ext - 1) == '.') {
*(filepath + doc->ext - 1) = '\0';
} else {
*(filepath + doc->ext) = '\0';
}
char filepath_escaped[PATH_MAX * 3];
str_escape(filepath_escaped, filepath + doc->base);
cJSON_AddStringToObject(json, "name", filepath_escaped);
if (doc->base > 0) {
*(filepath + doc->base - 1) = '\0';
str_escape(filepath_escaped, filepath);
cJSON_AddStringToObject(json, "path", filepath_escaped);
} else {
cJSON_AddStringToObject(json, "path", "");
}
char md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
cJSON_AddStringToObject(json, "_id", md5_str);
// Metadata
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
dyn_buffer_write_short(&buf, (uint16_t) meta->key);
if (IS_META_INT(meta->key)) {
dyn_buffer_write_int(&buf, meta->int_val);
} else if (IS_META_LONG(meta->key)) {
dyn_buffer_write_long(&buf, meta->long_val);
} else {
dyn_buffer_write_str(&buf, meta->str_val);
switch (meta->key) {
case MetaPages:
case MetaWidth:
case MetaHeight:
case MetaMediaDuration:
case MetaMediaBitrate: {
cJSON_AddNumberToObject(json, get_meta_key_text(meta->key), (double) meta->long_val);
buffer_size_guess += 20;
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val);
break;
}
default:
LOG_FATALF("serialize.c", "Invalid meta key: %x %s", meta->key, get_meta_key_text(meta->key))
}
meta_line_t *tmp = meta;
meta = meta->next;
free(tmp);
}
dyn_buffer_write_short(&buf, META_NEXT);
int res = write(index_fd, buf.buf, buf.cur);
if (res == -1) {
LOG_FATALF("serialize.c", "Could not write document: %s", strerror(errno))
char *json_str = cJSON_PrintBuffered(json, buffer_size_guess, FALSE);
cJSON_Delete(json);
return json_str;
}
static struct {
FILE *out_file;
size_t buf_out_size;
void *buf_out;
ZSTD_CCtx *cctx;
} WriterCtx = {
.out_file = NULL
};
#define ZSTD_COMPRESSION_LEVEL 10
void initialize_writer_ctx(const char *file_path) {
WriterCtx.out_file = fopen(file_path, "wb");
WriterCtx.buf_out_size = ZSTD_CStreamOutSize();
WriterCtx.buf_out = malloc(WriterCtx.buf_out_size);
WriterCtx.cctx = ZSTD_createCCtx();
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_compressionLevel, ZSTD_COMPRESSION_LEVEL);
ZSTD_CCtx_setParameter(WriterCtx.cctx, ZSTD_c_checksumFlag, FALSE);
LOG_DEBUGF("serialize.c", "Open index file for writing %s", file_path)
}
void zstd_write_string(const char *string, const size_t len) {
ZSTD_inBuffer input = {string, len, 0};
do {
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
ZSTD_compressStream2(WriterCtx.cctx, &output, &input, ZSTD_e_continue);
if (output.pos > 0) {
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
}
} while (input.pos != input.size);
}
void write_document_func(void *arg) {
if (WriterCtx.out_file == NULL) {
char dstfile[PATH_MAX];
snprintf(dstfile, PATH_MAX, "%s_index_main.ndjson.zst", ScanCtx.index.path);
initialize_writer_ctx(dstfile);
}
ScanCtx.stat_index_size += buf.cur;
dyn_buffer_destroy(&buf);
document_t *doc = arg;
char *json_str = build_json_string(doc);
const size_t json_str_len = strlen(json_str);
json_str = realloc(json_str, json_str_len + 1);
*(json_str + json_str_len) = '\n';
zstd_write_string(json_str, json_str_len + 1);
free(json_str);
free(doc->filepath);
}
void zstd_close() {
if (WriterCtx.out_file == NULL) {
LOG_DEBUG("serialize.c", "No zstd stream to close, skipping cleanup")
return;
}
size_t remaining;
do {
ZSTD_outBuffer output = {WriterCtx.buf_out, WriterCtx.buf_out_size, 0};
remaining = ZSTD_endStream(WriterCtx.cctx, &output);
if (output.pos > 0) {
ScanCtx.stat_index_size += fwrite(WriterCtx.buf_out, 1, output.pos, WriterCtx.out_file);
}
} while (remaining != 0);
ZSTD_freeCCtx(WriterCtx.cctx);
free(WriterCtx.buf_out);
fclose(WriterCtx.out_file);
LOG_DEBUG("serialize.c", "End zstd stream & close index file")
}
void writer_cleanup() {
zstd_close();
WriterCtx.out_file = NULL;
}
void write_index_descriptor(char *path, index_descriptor_t *desc) {
cJSON *json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "id", desc->id);
cJSON_AddStringToObject(json, "version", desc->version);
cJSON_AddStringToObject(json, "root", desc->root);
cJSON_AddStringToObject(json, "name", desc->name);
cJSON_AddStringToObject(json, "type", desc->type);
cJSON_AddStringToObject(json, "rewrite_url", desc->rewrite_url);
cJSON_AddNumberToObject(json, "timestamp", (double) desc->timestamp);
int fd = open(path, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
LOG_FATALF("serialize.c", "Could not open index descriptor: %s", strerror(errno));
}
char *str = cJSON_Print(json);
size_t ret = write(fd, str, strlen(str));
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not write index descriptor: %s", strerror(errno));
}
free(str);
close(fd);
cJSON_Delete(json);
}
index_descriptor_t read_index_descriptor(char *path) {
struct stat info;
stat(path, &info);
int fd = open(path, O_RDONLY);
if (fd == -1) {
LOG_FATALF("serialize.c", "Invalid/corrupt index (Could not find descriptor): %s: %s\n", path, strerror(errno))
}
char *buf = malloc(info.st_size + 1);
size_t ret = read(fd, buf, info.st_size);
if (ret == -1) {
LOG_FATALF("serialize.c", "Could not read index descriptor: %s", strerror(errno));
}
*(buf + info.st_size) = '\0';
close(fd);
cJSON *json = cJSON_Parse(buf);
index_descriptor_t descriptor;
descriptor.timestamp = (long) cJSON_GetObjectItem(json, "timestamp")->valuedouble;
strcpy(descriptor.root, cJSON_GetObjectItem(json, "root")->valuestring);
strcpy(descriptor.name, cJSON_GetObjectItem(json, "name")->valuestring);
strcpy(descriptor.rewrite_url, cJSON_GetObjectItem(json, "rewrite_url")->valuestring);
descriptor.root_len = (short) strlen(descriptor.root);
strcpy(descriptor.version, cJSON_GetObjectItem(json, "version")->valuestring);
strcpy(descriptor.id, cJSON_GetObjectItem(json, "id")->valuestring);
if (cJSON_GetObjectItem(json, "type") == NULL) {
strcpy(descriptor.type, INDEX_TYPE_NDJSON);
} else {
strcpy(descriptor.type, cJSON_GetObjectItem(json, "type")->valuestring);
}
cJSON_Delete(json);
free(buf);
return descriptor;
}
void write_document(document_t *doc) {
tpool_add_work(ScanCtx.writer_pool, write_document_func, doc);
}
void thread_cleanup() {
close(index_fd);
cleanup_parse();
cleanup_font();
}
void read_index_bin_handle_line(const char *line, const char *index_id, index_func func) {
void read_index_bin(const char *path, const char *index_id, index_func func) {
line_t line;
dyn_buffer_t buf = dyn_buffer_create();
cJSON *document = cJSON_Parse(line);
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
FILE *file = fopen(path, "rb");
while (TRUE) {
buf.cur = 0;
size_t _ = fread((void *) &line, sizeof(line_t), 1, file);
if (feof(file)) {
break;
}
cJSON_AddStringToObject(document, "index", index_id);
cJSON *document = cJSON_CreateObject();
cJSON_AddStringToObject(document, "index", index_id);
// Load meta from sidecar files
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(line.path_md5, sizeof(line.path_md5), path_md5_str);
const char *mime_text = mime_get_mime_text(line.mime);
if (mime_text == NULL) {
cJSON_AddNullToObject(document, "mime");
} else {
cJSON_AddStringToObject(document, "mime", mime_get_mime_text(line.mime));
}
cJSON_AddNumberToObject(document, "size", (double) line.size);
cJSON_AddNumberToObject(document, "mtime", line.mtime);
int c = 0;
while ((c = getc(file)) != 0) {
dyn_buffer_write_char(&buf, (char) c);
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
*(buf.buf + line.ext - 1) = '\0';
} else {
*(buf.buf + line.ext) = '\0';
}
char tmp[PATH_MAX * 3];
str_escape(tmp, buf.buf + line.base);
cJSON_AddStringToObject(document, "name", tmp);
if (line.base > 0) {
*(buf.buf + line.base - 1) = '\0';
str_escape(tmp, buf.buf);
cJSON_AddStringToObject(document, "path", tmp);
} else {
cJSON_AddStringToObject(document, "path", "");
}
enum metakey key = 0;
fread(&key, sizeof(uint16_t), 1, file);
size_t ret;
while (key != META_NEXT) {
switch (key) {
case MetaPages:
case MetaWidth:
case MetaHeight: {
int value;
ret = fread(&value, sizeof(int), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), value);
break;
}
case MetaMediaDuration:
case MetaMediaBitrate: {
long value;
ret = fread(&value, sizeof(long), 1, file);
cJSON_AddNumberToObject(document, get_meta_key_text(key), (double) value);
break;
}
case MetaMediaAudioCodec:
case MetaMediaVideoCodec:
case MetaContent:
case MetaArtist:
case MetaAlbum:
case MetaAlbumArtist:
case MetaGenre:
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:
case MetaExifFocalLength:
case MetaExifUserComment:
case MetaExifIsoSpeedRatings:
case MetaExifDateTime:
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaTitle: {
buf.cur = 0;
while ((c = getc(file)) != 0) {
if (SHOULD_KEEP_CHAR(c) || c == ' ') {
dyn_buffer_write_char(&buf, (char) c);
}
}
dyn_buffer_write_char(&buf, '\0');
cJSON_AddStringToObject(document, get_meta_key_text(key), buf.buf);
break;
}
default:
LOG_FATALF("serialize.c", "Invalid meta key (corrupt index): %x", key)
cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
fread(&key, sizeof(uint16_t), 1, file);
}
cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, path_md5_str);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);
cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
}
}
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
func(document, path_md5_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
dyn_buffer_destroy(&buf);
fclose(file);
// Load tags from tags DB
if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, path_md5_str);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
func(document, path_md5_str);
cJSON_DeleteItemFromObject(document, "_id");
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
const char *json_type_copy_fields[] = {
"mime", "name", "path", "extension", "index", "size", "mtime", "parent",
void read_index_ndjson(const char *path, const char *index_id, index_func func) {
dyn_buffer_t buf = dyn_buffer_create();
// Meta
"title", "content", "width", "height", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "title", "font_name",
// Initialize zstd things
FILE *file = fopen(path, "rb");
// Special
"tag", "_url"
};
size_t const buf_in_size = ZSTD_DStreamInSize();
void *const buf_in = malloc(buf_in_size);
const char *json_type_array_fields[] = {
"_keyword", "_text"
};
size_t const buf_out_size = ZSTD_DStreamOutSize();
void *const buf_out = malloc(buf_out_size);
void read_index_json(const char *path, UNUSED(const char *index_id), index_func func) {
ZSTD_DCtx *const dctx = ZSTD_createDCtx();
FILE *file = fopen(path, "r");
while (TRUE) {
char *line = NULL;
size_t len;
size_t read = getline(&line, &len, file);
if (read < 0) {
if (line) {
free(line);
}
break;
}
size_t read;
size_t last_ret = 0;
while ((read = fread(buf_in, 1, buf_in_size, file))) {
ZSTD_inBuffer input = {buf_in, read, 0};
cJSON *input = cJSON_Parse(line);
if (input == NULL) {
LOG_FATALF("serialize.c", "Could not parse JSON line: \n%s", line)
}
if (line) {
free(line);
}
while (input.pos < input.size) {
ZSTD_outBuffer output = {buf_out, buf_out_size, 0};
cJSON *document = cJSON_CreateObject();
const char *id_str = cJSON_GetObjectItem(input, "_id")->valuestring;
size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
for (int i = 0; i < (sizeof(json_type_copy_fields) / sizeof(json_type_copy_fields[0])); i++) {
cJSON *value = cJSON_GetObjectItem(input, json_type_copy_fields[i]);
if (value != NULL) {
cJSON_AddItemReferenceToObject(document, json_type_copy_fields[i], value);
}
}
for (int i = 0; i < output.pos; i++) {
char c = ((char *) output.dst)[i];
for (int i = 0; i < (sizeof(json_type_array_fields) / sizeof(json_type_array_fields[0])); i++) {
cJSON *arr = cJSON_GetObjectItem(input, json_type_array_fields[i]);
if (arr != NULL) {
cJSON *obj;
cJSON_ArrayForEach(obj, arr) {
char key[1024];
cJSON *k = cJSON_GetObjectItem(obj, "k");
cJSON *v = cJSON_GetObjectItem(obj, "v");
if (k == NULL || v == NULL || !cJSON_IsString(k) || !cJSON_IsString(v)) {
char *str = cJSON_Print(obj);
LOG_FATALF("serialize.c", "Invalid %s member: must contain .k and .v string fields: \n%s",
json_type_array_fields[i], str)
}
snprintf(key, sizeof(key), "%s.%s", json_type_array_fields[i], k->valuestring);
cJSON_AddStringToObject(document, key, v->valuestring);
if (c == '\n') {
dyn_buffer_write_char(&buf, '\0');
read_index_bin_handle_line(buf.buf, index_id, func);
buf.cur = 0;
} else {
dyn_buffer_write_char(&buf, c);
}
}
last_ret = ret;
}
func(document, id_str);
cJSON_Delete(document);
cJSON_Delete(input);
}
if (last_ret != 0) {
/* The last return value from ZSTD_decompressStream did not end on a
* frame, but we reached the end of the file! We assume this is an
* error, and the input was truncated.
*/
LOG_FATALF("serialize.c", "EOF before end of stream: %zu", last_ret)
}
ZSTD_freeDCtx(dctx);
free(buf_in);
free(buf_out);
dyn_buffer_destroy(&buf);
fclose(file);
}
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_BIN) == 0) {
read_index_bin(path, index_id, func);
} else if (strcmp(type, INDEX_TYPE_JSON) == 0) {
read_index_json(path, index_id, func);
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
read_index_ndjson(path, index_id, func);
}
}
void incremental_read(GHashTable *table, const char *filepath) {
FILE *file = fopen(filepath, "rb");
line_t line;
static __thread GHashTable *IncrementalReadTable = NULL;
LOG_DEBUGF("serialize.c", "Incremental read %s", filepath)
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
while (1) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
}
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
IncrementalReadTable = table;
read_index(filepath, desc->id, desc->type, json_put_incremental);
}
static __thread GHashTable *IncrementalCopyTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL;
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
unsigned char path_md5[MD5_DIGEST_LENGTH];
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
// Copy index line
cJSON_DeleteItemFromObject(document, "index");
char *json_str = cJSON_PrintUnformatted(document);
const size_t json_str_len = strlen(json_str);
json_str = realloc(json_str, json_str_len + 1);
*(json_str + json_str_len) = '\n';
zstd_write_string(json_str, json_str_len + 1);
free(json_str);
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
incremental_put(table, line.path_md5, line.mtime);
while ((getc(file)) != 0) {}
skip_meta(file);
}
fclose(file);
}
/**
@@ -493,72 +507,14 @@ void incremental_read(GHashTable *table, const char *filepath) {
*/
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table) {
FILE *file = fopen(filepath, "rb");
FILE *dst_file = fopen(dst_filepath, "ab");
line_t line;
LOG_DEBUGF("serialize.c", "Incremental copy %s", filepath)
while (TRUE) {
size_t ret = fread((void *) &line, sizeof(line_t), 1, file);
if (ret != 1 || feof(file)) {
break;
}
// Assume that files with parents still exist.
// One way to "fix" this would be to check if the parent is marked for copy but it would consistently
// delete files with grandparents, which is a side-effect worse than having orphaned files
if (line.has_parent || incremental_get(copy_table, line.path_md5)) {
fwrite(&line, sizeof(line), 1, dst_file);
// Copy filepath
char filepath_buf[PATH_MAX];
char c;
char *ptr = filepath_buf;
while ((c = (char) getc(file))) {
*ptr++ = c;
}
*ptr = '\0';
fwrite(filepath_buf, (ptr - filepath_buf) + 1, 1, dst_file);
// Copy tn store contents
size_t buf_len;
char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) filepath_buf, (ptr - filepath_buf), (unsigned char *) path_md5);
char *buf = store_read(store, path_md5, sizeof(path_md5), &buf_len);
if (buf_len != 0) {
store_write(dst_store, path_md5, sizeof(path_md5), buf, buf_len);
free(buf);
}
enum metakey key = 0;
while (1) {
fread(&key, sizeof(uint16_t), 1, file);
fwrite(&key, sizeof(uint16_t), 1, dst_file);
if (key == META_NEXT) {
break;
}
if (IS_META_INT(key)) {
int val;
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else if (IS_META_LONG(key)) {
long val;
ret = fread(&val, sizeof(val), 1, file);
fwrite(&val, sizeof(val), 1, dst_file);
} else {
while ((c = (char) getc(file))) {
fwrite(&c, sizeof(c), 1, dst_file);
}
fwrite("\0", sizeof(c), 1, dst_file);
}
}
} else {
while ((getc(file))) {}
skip_meta(file);
}
if (WriterCtx.out_file == NULL) {
initialize_writer_ctx(dst_filepath);
}
fclose(file);
fclose(dst_file);
IncrementalCopyTable = copy_table;
IncrementalCopySourceStore = store;
IncrementalCopyDestinationStore = dst_store;
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
}

View File

@@ -16,13 +16,15 @@ void write_document(document_t *doc);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath);
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
/**
* Must be called after write_document
*/
void thread_cleanup();
void writer_cleanup();
void write_index_descriptor(char *path, index_descriptor_t *desc);
index_descriptor_t read_index_descriptor(char *path);

View File

@@ -1,9 +1,10 @@
#include "store.h"
#include "src/ctx.h"
store_t *store_create(char *path, size_t chunk_size) {
store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
pthread_rwlock_init(&store->lock, NULL);
@@ -38,7 +39,7 @@ void store_destroy(store_t *store) {
#if (SIST_FAKE_STORE != 1)
pthread_rwlock_destroy(&store->lock);
mdb_close(store->env, store->dbi);
mdb_dbi_close(store->env, store->dbi);
mdb_env_close(store->env);
#endif
free(store);

View File

@@ -11,6 +11,8 @@
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
char *path;
char *tmp_path;
MDB_dbi dbi;
MDB_env *env;
size_t size;
@@ -18,7 +20,7 @@ typedef struct store_t {
pthread_rwlock_t lock;
} store_t;
store_t *store_create(char *path, size_t chunk_size);
store_t *store_create(const char *path, size_t chunk_size);
void store_destroy(store_t *store);