Add argument to calculate checksums

This commit is contained in:
2021-09-11 14:31:31 -04:00
parent 7267d4bd2c
commit c9aa4bed72
17 changed files with 80 additions and 22 deletions

View File

@@ -28,6 +28,7 @@ typedef struct scan_args {
int max_memory_buffer;
int read_subtitles;
int fast_epub;
int calculate_checksums;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -33,6 +33,7 @@ typedef struct {
int threads;
int depth;
int calculate_checksums;
size_t stat_tn_size;
size_t stat_index_size;

File diff suppressed because one or more lines are too long

View File

@@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
case MetaChecksum:
return "checksum";
default:
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
}
@@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaChecksum:
case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val);

View File

@@ -28,6 +28,8 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
job->vfile.has_checksum = FALSE;
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
return job;
}

View File

@@ -170,6 +170,8 @@ void initialize_scan_context(scan_args_t *args) {
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
// Archive
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
@@ -516,8 +518,8 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
// sigsegv_handler = signal(SIGSEGV, sig_handler);
// sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, "");
@@ -574,6 +576,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_BOOLEAN(0, "checksum", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -10,25 +10,34 @@
#define MIN_VIDEO_SIZE (1024 * 64)
#define MIN_IMAGE_SIZE (1024 * 2)
#define MIN_IMAGE_SIZE (512)
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
SHA1_Init(&f->sha1_ctx);
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1;
}
}
return read(f->fd, buf, size);
int ret = (int) read(f->fd, buf, size);
if (ret != 0 && f->calculate_checksum) {
f->has_checksum = TRUE;
safe_sha1_update(&f->sha1_ctx, (unsigned char*)buf, ret);
}
return ret;
}
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
close(f->fd);
}
}
@@ -66,7 +75,7 @@ void parse(void *arg) {
doc->meta_tail = NULL;
doc->mime = 0;
doc->size = job->vfile.info.st_size;
doc->mtime = job->vfile.info.st_mtim.tv_sec;
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
@@ -202,9 +211,15 @@ void parse(void *arg) {
doc->has_parent = FALSE;
}
write_document(doc);
CLOSE_FILE(job->vfile)
if (job->vfile.has_checksum) {
char sha1_digest_str[SHA1_STR_LENGTH];
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
}
write_document(doc);
}
void cleanup_parse() {

View File

@@ -26,6 +26,8 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"

File diff suppressed because one or more lines are too long