incremental_delete: read from index file so that we have parent info

This commit is contained in:
Yatao Li 2022-01-21 03:30:07 +08:00
parent e65905a165
commit 8f7edf3190
3 changed files with 42 additions and 35 deletions

View File

@ -486,6 +486,7 @@ void incremental_read(GHashTable *table, const char *filepath, index_descriptor_
} }
static __thread GHashTable *IncrementalCopyTable = NULL; static __thread GHashTable *IncrementalCopyTable = NULL;
static __thread GHashTable *IncrementalNewTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL; static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL; static __thread store_t *IncrementalCopyDestinationStore = NULL;
@ -535,20 +536,32 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc); read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
} }
void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table) { void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
GHashTableIter iter;
gpointer key, UNUSED(value); char path_md5_n[MD5_STR_LENGTH + 1];
char path_md5[MD5_STR_LENGTH + 1]; path_md5_n[MD5_STR_LENGTH] = '\0';
path_md5[MD5_STR_LENGTH] = '\0'; path_md5_n[MD5_STR_LENGTH - 1] = '\n';
path_md5[MD5_STR_LENGTH - 1] = '\n'; const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
// do not delete archive virtual entries
if (cJSON_GetObjectItem(document, "parent") == NULL
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
) {
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
}
}
void incremental_delete(const char *del_filepath, const char* index_filepath,
GHashTable *copy_table, GHashTable *new_table) {
if (WriterCtx.out_file == NULL) {
initialize_writer_ctx(del_filepath); initialize_writer_ctx(del_filepath);
g_hash_table_iter_init(&iter, orig_table);
while(g_hash_table_iter_next(&iter, &key, &value)) {
if (NULL == g_hash_table_lookup(new_table, key) &&
NULL == g_hash_table_lookup(copy_table, key)) {
memcpy(path_md5, key, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5, MD5_STR_LENGTH);
} }
}
writer_cleanup(); IncrementalCopyTable = copy_table;
IncrementalNewTable = new_table;
read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
} }

View File

@ -17,7 +17,8 @@ typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table); const char *dst_filepath, GHashTable *copy_table);
void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table); void incremental_delete(const char *del_filepath, const char* index_filepath,
GHashTable *copy_table, GHashTable *new_table);
void write_document(document_t *doc); void write_document(document_t *doc);
@ -47,7 +48,7 @@ index_descriptor_t read_index_descriptor(char *path);
action_main_fail; \ action_main_fail; \
} \ } \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \ snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && 0 == access(file_path, R_OK)) { \ if ((cond_original) && (0 == access(file_path, R_OK))) { \
action_ok; \ action_ok; \
} \ } \

View File

@ -299,11 +299,6 @@ void load_incremental_index(const scan_args_t *args) {
ScanCtx.copy_table = incremental_get_table(); ScanCtx.copy_table = incremental_get_table();
ScanCtx.new_table = incremental_get_table(); ScanCtx.new_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
char descriptor_path[PATH_MAX]; char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental); snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path); index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
@ -315,8 +310,6 @@ void load_incremental_index(const scan_args_t *args) {
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc), READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1); LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1);
closedir(dir);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table)) LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
} }
@ -330,26 +323,26 @@ void save_incremental_index(scan_args_t* args) {
char dst_path[PATH_MAX]; char dst_path[PATH_MAX];
char store_path[PATH_MAX]; char store_path[PATH_MAX];
char file_path[PATH_MAX]; char file_path[PATH_MAX];
char del_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental); snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path); snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
store_t *source = store_create(store_path, STORE_SIZE_TN); store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental); LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
if (dir == NULL) { g_hash_table_size(ScanCtx.original_table),
perror("opendir"); g_hash_table_size(ScanCtx.copy_table),
return; g_hash_table_size(ScanCtx.new_table));
} snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
READ_INDICES(file_path, args->incremental, incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path); perror("incremental_delete"), 1);
incremental_delete(file_path, ScanCtx.original_table, ScanCtx.copy_table, ScanCtx.new_table); writer_cleanup();
READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table), READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
perror("incremental_copy"), 1); perror("incremental_copy"), 1);
closedir(dir);
store_destroy(source);
writer_cleanup(); writer_cleanup();
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental); snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path); snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG); store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);