Compare commits

..

4 Commits

Author SHA1 Message Date
17fda1e540 Support for rewind buffer 2021-09-11 20:46:40 -04:00
34b363bfd8 Add argument to calculate checksums 2021-09-11 14:31:48 -04:00
c9aa4bed72 Add argument to calculate checksums 2021-09-11 14:31:31 -04:00
7267d4bd2c Add basic JSON/NDJSON support 2021-09-07 08:14:32 -04:00
18 changed files with 92 additions and 29 deletions

View File

@ -4,6 +4,10 @@
"type": "keyword", "type": "keyword",
"doc_values": true "doc_values": true
}, },
"checksum": {
"type": "keyword",
"index": false
},
"_depth": { "_depth": {
"type": "integer" "type": "integer"
}, },

File diff suppressed because one or more lines are too long

View File

@ -50,6 +50,7 @@ export interface EsHit {
height: number height: number
duration: number duration: number
tag: string[] tag: string[]
checksum: string
} }
_props: { _props: {
isSubDocument: boolean isSubDocument: boolean

View File

@ -3,7 +3,7 @@
<template #cell(value)="data"> <template #cell(value)="data">
<span v-if="'html' in data.item" v-html="data.item.html"></span> <span v-if="'html' in data.item" v-html="data.item.html"></span>
<span v-else>{{data.value}}</span> <span v-else>{{ data.value }}</span>
</template> </template>
</b-table> </b-table>
</template> </template>
@ -57,7 +57,8 @@ export default {
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author", "bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
"modified_by", "pages", "tag", "modified_by", "pages", "tag",
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length", "exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime", "exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
"checksum"
]; ];
fields.forEach(field => { fields.forEach(field => {
@ -76,9 +77,9 @@ export default {
items.push({ items.push({
key: "Exif GPS", key: "Exif GPS",
html: makeGpsLink( html: makeGpsLink(
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]), dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]), dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
), ),
}); });
} }

View File

@ -62,7 +62,8 @@ export default {
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.", lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
slideDuration: "Slide duration", slideDuration: "Slide duration",
resultSize: "Number of results per page", resultSize: "Number of results per page",
tagOrOperator: "Use OR operator when specifying multiple tags." tagOrOperator: "Use OR operator when specifying multiple tags.",
hideDuplicates: "Hide duplicate results based on checksum"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",
@ -209,7 +210,8 @@ export default {
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images", lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
slideDuration: "Durée des diapositives", slideDuration: "Durée des diapositives",
resultSize: "Nombre de résultats par page", resultSize: "Nombre de résultats par page",
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags" tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
hideDuplicates: "Masquer les résultats en double"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",

View File

@ -27,6 +27,7 @@ export default new Vuex.Store({
size: 60, size: 60,
optLang: "en", optLang: "en",
optHideDuplicates: true,
optTheme: "light", optTheme: "light",
optDisplay: "grid", optDisplay: "grid",
@ -79,6 +80,7 @@ export default new Vuex.Store({
setSizeMax: (state, val) => state.sizeMax = val, setSizeMax: (state, val) => state.sizeMax = val,
setSist2Info: (state, val) => state.sist2Info = val, setSist2Info: (state, val) => state.sist2Info = val,
setSeed: (state, val) => state.seed = val, setSeed: (state, val) => state.seed = val,
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
setOptLang: (state, val) => state.optLang = val, setOptLang: (state, val) => state.optLang = val,
setSortMode: (state, val) => state.sortMode = val, setSortMode: (state, val) => state.sortMode = val,
setIndices: (state, val) => { setIndices: (state, val) => {
@ -317,6 +319,7 @@ export default new Vuex.Store({
uiLightboxKey: state => state.uiLightboxKey, uiLightboxKey: state => state.uiLightboxKey,
uiLightboxSlide: state => state.uiLightboxSlide, uiLightboxSlide: state => state.uiLightboxSlide,
optHideDuplicates: state => state.optHideDuplicates,
optLang: state => state.optLang, optLang: state => state.optLang,
optTheme: state => state.optTheme, optTheme: state => state.optTheme,
optDisplay: state => state.optDisplay, optDisplay: state => state.optDisplay,

View File

@ -35,6 +35,11 @@
<br/> <br/>
<h4>{{ $t("searchOptions") }}</h4> <h4>{{ $t("searchOptions") }}</h4>
<b-card> <b-card>
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
$t("opt.hideDuplicates")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox> <b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{ <b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
$t("opt.tagOrOperator") $t("opt.tagOrOperator")
@ -206,10 +211,10 @@ export default {
"optTreemapSize", "optTreemapSize",
"optLightboxLoadOnlyCurrent", "optLightboxLoadOnlyCurrent",
"optLightboxSlideDuration", "optLightboxSlideDuration",
"optContainerWidth",
"optResultSize", "optResultSize",
"optTagOrOperator", "optTagOrOperator",
"optLang" "optLang",
"optHideDuplicates",
]), ]),
clientWidth() { clientWidth() {
return window.innerWidth; return window.innerWidth;
@ -248,7 +253,8 @@ export default {
"setOptContainerWidth", "setOptContainerWidth",
"setOptResultSize", "setOptResultSize",
"setOptTagOrOperator", "setOptTagOrOperator",
"setOptLang" "setOptLang",
"setOptHideDuplicates"
]), ]),
onResetClick() { onResetClick() {
localStorage.removeItem("sist2_configuration"); localStorage.removeItem("sist2_configuration");

View File

@ -91,6 +91,7 @@ export default Vue.extend({
search: undefined as any, search: undefined as any,
docs: [] as EsHit[], docs: [] as EsHit[],
docIds: new Set(), docIds: new Set(),
docChecksums: new Set(),
searchBusy: false, searchBusy: false,
Sist2Query: Sist2Query, Sist2Query: Sist2Query,
showHelp: false showHelp: false
@ -193,6 +194,7 @@ export default Vue.extend({
async clearResults() { async clearResults() {
this.docs = []; this.docs = [];
this.docIds.clear(); this.docIds.clear();
this.docChecksums.clear();
await this.$store.dispatch("clearResults"); await this.$store.dispatch("clearResults");
this.$store.commit("setUiReachedScrollEnd", false); this.$store.commit("setUiReachedScrollEnd", false);
}, },
@ -202,7 +204,19 @@ export default Vue.extend({
} }
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id)); resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
if (this.$store.state.optHideDuplicates) {
resp.hits.hits = resp.hits.hits.filter(hit => {
if (!("checksum" in hit._source)) {
return true;
}
const isDupe = !this.docChecksums.has(hit._source.checksum);
this.docChecksums.add(hit._source.checksum);
return isDupe;
});
}
for (const hit of resp.hits.hits) { for (const hit of resp.hits.hits) {
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) { if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {

View File

@ -28,6 +28,7 @@ typedef struct scan_args {
int max_memory_buffer; int max_memory_buffer;
int read_subtitles; int read_subtitles;
int fast_epub; int fast_epub;
int calculate_checksums;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();

View File

@ -33,6 +33,7 @@ typedef struct {
int threads; int threads;
int depth; int depth;
int calculate_checksums;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;

File diff suppressed because one or more lines are too long

View File

@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_gps_latitude_dms"; return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec: case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec"; return "exif_gps_latitude_dec";
case MetaChecksum:
return "checksum";
default: default:
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key) LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
} }
@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
case MetaExifGpsLatitudeDMS: case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec: case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef: case MetaExifGpsLatitudeRef:
case MetaChecksum:
case MetaTitle: { case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val); cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val); buffer_size_guess += (int) strlen(meta->str_val);

View File

@ -24,10 +24,16 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.filepath = job->filepath; job->vfile.filepath = job->filepath;
job->vfile.read = fs_read; job->vfile.read = fs_read;
// Filesystem reads are always rewindable
job->vfile.read_rewindable = fs_read;
job->vfile.reset = fs_reset; job->vfile.reset = fs_reset;
job->vfile.close = fs_close; job->vfile.close = fs_close;
job->vfile.fd = -1; job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE; job->vfile.is_fs_file = TRUE;
job->vfile.has_checksum = FALSE;
job->vfile.rewind_buffer_size = 0;
job->vfile.rewind_buffer = NULL;
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
return job; return job;
} }

View File

@ -170,6 +170,8 @@ void initialize_scan_context(scan_args_t *args) {
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
// Archive // Archive
ScanCtx.arc_ctx.mode = args->archive_mode; ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.log = _log;
@ -516,8 +518,8 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler); // sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler); // sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
@ -574,6 +576,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"), "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@ -10,25 +10,34 @@
#define MIN_VIDEO_SIZE (1024 * 64) #define MIN_VIDEO_SIZE (1024 * 64)
#define MIN_IMAGE_SIZE (1024 * 2) #define MIN_IMAGE_SIZE (512)
int fs_read(struct vfile *f, void *buf, size_t size) { int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) { if (f->fd == -1) {
SHA1_Init(&f->sha1_ctx);
f->fd = open(f->filepath, O_RDONLY); f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) { if (f->fd == -1) {
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1; return -1;
} }
} }
return read(f->fd, buf, size); int ret = (int) read(f->fd, buf, size);
if (ret != 0 && f->calculate_checksum) {
f->has_checksum = TRUE;
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
}
return ret;
} }
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));}; #define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
void fs_close(struct vfile *f) { void fs_close(struct vfile *f) {
if (f->fd != -1) { if (f->fd != -1) {
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
close(f->fd); close(f->fd);
} }
} }
@ -66,7 +75,7 @@ void parse(void *arg) {
doc->meta_tail = NULL; doc->meta_tail = NULL;
doc->mime = 0; doc->mime = 0;
doc->size = job->vfile.info.st_size; doc->size = job->vfile.info.st_size;
doc->mtime = job->vfile.info.st_mtim.tv_sec; doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5); int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
@ -93,18 +102,17 @@ void parse(void *arg) {
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
} }
int bytes_read = 0;
if (doc->mime == 0 && !ScanCtx.fast) { if (doc->mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic // Get mime type with libmagic
if (!job->vfile.is_fs_file) { if (job->vfile.read_rewindable == NULL) {
LOG_WARNING(job->filepath, LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported"); "File does not support rewindable reads, cannot guess Media type");
goto abort; goto abort;
} }
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE); int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) { if (bytes_read < 0) {
if (job->vfile.is_fs_file) { if (job->vfile.is_fs_file) {
@ -135,7 +143,9 @@ void parse(void *arg) {
} }
} }
job->vfile.reset(&job->vfile); if (job->vfile.reset != NULL) {
job->vfile.reset(&job->vfile);
}
magic_close(magic); magic_close(magic);
} }
@ -149,7 +159,7 @@ void parse(void *arg) {
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) || } else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { (mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(&ScanCtx.media_ctx, &job->vfile, doc); parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
} else if (IS_PDF(doc->mime)) { } else if (IS_PDF(doc->mime)) {
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc); parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
@ -202,9 +212,15 @@ void parse(void *arg) {
doc->has_parent = FALSE; doc->has_parent = FALSE;
} }
write_document(doc);
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
if (job->vfile.has_checksum) {
char sha1_digest_str[SHA1_STR_LENGTH];
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
}
write_document(doc);
} }
void cleanup_parse() { void cleanup_parse() {

View File

@ -26,6 +26,8 @@
#define UNUSED(x) __attribute__((__unused__)) x #define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33 #define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h" #include "util.h"
#include "log.h" #include "log.h"

File diff suppressed because one or more lines are too long

2
third-party/libscan vendored

@ -1 +1 @@
Subproject commit fe53e1a219246d829439bb26093713a415a58924 Subproject commit da172823745b67662846cf1970a47ebcea8fe50e