From 8f7edf319044890b5e24bdd1d140e9ae581bf917 Mon Sep 17 00:00:00 2001 From: Yatao Li Date: Fri, 21 Jan 2022 03:30:07 +0800 Subject: [PATCH] incremental_delete: read from index file so that we have parent info --- src/io/serialize.c | 43 ++++++++++++++++++++++++++++--------------- src/io/serialize.h | 5 +++-- src/main.c | 29 +++++++++++------------------ 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/io/serialize.c b/src/io/serialize.c index 36c9e9e..c0bb9d1 100644 --- a/src/io/serialize.c +++ b/src/io/serialize.c @@ -486,6 +486,7 @@ void incremental_read(GHashTable *table, const char *filepath, index_descriptor_ } static __thread GHashTable *IncrementalCopyTable = NULL; +static __thread GHashTable *IncrementalNewTable = NULL; static __thread store_t *IncrementalCopySourceStore = NULL; static __thread store_t *IncrementalCopyDestinationStore = NULL; @@ -535,20 +536,32 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc); } -void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table) { - GHashTableIter iter; - gpointer key, UNUSED(value); - char path_md5[MD5_STR_LENGTH + 1]; - path_md5[MD5_STR_LENGTH] = '\0'; - path_md5[MD5_STR_LENGTH - 1] = '\n'; - initialize_writer_ctx(del_filepath); - g_hash_table_iter_init(&iter, orig_table); - while(g_hash_table_iter_next(&iter, &key, &value)) { - if (NULL == g_hash_table_lookup(new_table, key) && - NULL == g_hash_table_lookup(copy_table, key)) { - memcpy(path_md5, key, MD5_STR_LENGTH - 1); - zstd_write_string(path_md5, MD5_STR_LENGTH); - } +void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) { + + char path_md5_n[MD5_STR_LENGTH + 1]; + path_md5_n[MD5_STR_LENGTH] = '\0'; + path_md5_n[MD5_STR_LENGTH - 1] = '\n'; + const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring; + + // do not delete archive virtual entries + if (cJSON_GetObjectItem(document, "parent") == NULL + && !incremental_get_str(IncrementalCopyTable, path_md5_str) + && !incremental_get_str(IncrementalNewTable, path_md5_str) + ) { + memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1); + zstd_write_string(path_md5_n, MD5_STR_LENGTH); } - writer_cleanup(); +} + +void incremental_delete(const char *del_filepath, const char* index_filepath, + GHashTable *copy_table, GHashTable *new_table) { + + if (WriterCtx.out_file == NULL) { + initialize_writer_ctx(del_filepath); + } + + IncrementalCopyTable = copy_table; + IncrementalNewTable = new_table; + + read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc); } diff --git a/src/io/serialize.h b/src/io/serialize.h index d3339df..0d8275e 100644 --- a/src/io/serialize.h +++ b/src/io/serialize.h @@ -17,7 +17,8 @@ typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]); void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, const char *dst_filepath, GHashTable *copy_table); -void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table); +void incremental_delete(const char *del_filepath, const char* index_filepath, + GHashTable *copy_table, GHashTable *new_table); void write_document(document_t *doc); @@ -47,7 +48,7 @@ index_descriptor_t read_index_descriptor(char *path); action_main_fail; \ } \ snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \ - if ((cond_original) && 0 == access(file_path, R_OK)) { \ + if ((cond_original) && (0 == access(file_path, R_OK))) { \ action_ok; \ } \ diff --git a/src/main.c b/src/main.c index d3958f1..f514569 100644 --- a/src/main.c +++ b/src/main.c @@ -299,11 +299,6 @@ void load_incremental_index(const scan_args_t *args) { ScanCtx.copy_table = incremental_get_table(); ScanCtx.new_table = incremental_get_table(); - DIR *dir = opendir(args->incremental); - if (dir == NULL) { - LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno)) - } - char descriptor_path[PATH_MAX]; snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); index_descriptor_t original_desc = read_index_descriptor(descriptor_path); @@ -315,8 +310,6 @@ void load_incremental_index(const scan_args_t *args) { READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc), LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1); - closedir(dir); - LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) } @@ -330,26 +323,26 @@ void save_incremental_index(scan_args_t* args) { char dst_path[PATH_MAX]; char store_path[PATH_MAX]; char file_path[PATH_MAX]; + char del_path[PATH_MAX]; snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path); store_t *source = store_create(store_path, STORE_SIZE_TN); - DIR *dir = opendir(args->incremental); - if (dir == NULL) { - perror("opendir"); - return; - } - - snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path); - incremental_delete(file_path, ScanCtx.original_table, ScanCtx.copy_table, ScanCtx.new_table); + LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u", + g_hash_table_size(ScanCtx.original_table), + g_hash_table_size(ScanCtx.copy_table), + g_hash_table_size(ScanCtx.new_table)); + snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path); + READ_INDICES(file_path, args->incremental, incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table), + perror("incremental_delete"), 1); + writer_cleanup(); READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table), perror("incremental_copy"), 1); - - closedir(dir); - store_destroy(source); writer_cleanup(); + store_destroy(source); + snprintf(store_path, PATH_MAX, "%stags", args->incremental); snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path); store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);