From e65905a16553c8678bbda3c2576793d901a8b398 Mon Sep 17 00:00:00 2001 From: Yatao Li Date: Thu, 20 Jan 2022 23:39:38 +0800 Subject: [PATCH] only add new entries into new_table to save memory --- src/io/serialize.c | 5 +++-- src/io/serialize.h | 2 +- src/main.c | 2 +- src/parsing/parse.c | 1 - 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/io/serialize.c b/src/io/serialize.c index 5eb4f0c..36c9e9e 100644 --- a/src/io/serialize.c +++ b/src/io/serialize.c @@ -535,7 +535,7 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc); } -void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *new_table) { +void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table) { GHashTableIter iter; gpointer key, UNUSED(value); char path_md5[MD5_STR_LENGTH + 1]; @@ -544,7 +544,8 @@ void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashT initialize_writer_ctx(del_filepath); g_hash_table_iter_init(&iter, orig_table); while(g_hash_table_iter_next(&iter, &key, &value)) { - if (NULL == g_hash_table_lookup(new_table, key)) { + if (NULL == g_hash_table_lookup(new_table, key) && + NULL == g_hash_table_lookup(copy_table, key)) { memcpy(path_md5, key, MD5_STR_LENGTH - 1); zstd_write_string(path_md5, MD5_STR_LENGTH); } diff --git a/src/io/serialize.h b/src/io/serialize.h index 5ded868..d3339df 100644 --- a/src/io/serialize.h +++ b/src/io/serialize.h @@ -17,7 +17,7 @@ typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]); void incremental_copy(store_t *store, store_t *dst_store, const char *filepath, const char *dst_filepath, GHashTable *copy_table); -void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *new_table); +void incremental_delete(const char *del_filepath, GHashTable *orig_table, GHashTable *copy_table, GHashTable *new_table); void write_document(document_t *doc); diff --git a/src/main.c b/src/main.c index 3bb9401..d3958f1 100644 --- a/src/main.c +++ b/src/main.c @@ -341,7 +341,7 @@ void save_incremental_index(scan_args_t* args) { } snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path); - incremental_delete(file_path, ScanCtx.original_table, ScanCtx.new_table); + incremental_delete(file_path, ScanCtx.original_table, ScanCtx.copy_table, ScanCtx.new_table); READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table), perror("incremental_copy"), 1); diff --git a/src/parsing/parse.c b/src/parsing/parse.c index 1b74cd4..7394c5f 100644 --- a/src/parsing/parse.c +++ b/src/parsing/parse.c @@ -81,7 +81,6 @@ void parse(void *arg) { if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { pthread_mutex_lock(&ScanCtx.copy_table_mu); incremental_mark_file(ScanCtx.copy_table, doc->path_md5); - incremental_mark_file(ScanCtx.new_table, doc->path_md5); pthread_mutex_unlock(&ScanCtx.copy_table_mu); pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);