Compare commits

..

9 Commits

Author SHA1 Message Date
58741058cf Merge pull request #200 from simon987/dev
v2.11.3
2021-09-24 20:56:00 -04:00
0a7e59b646 Some documentation updates 2021-09-24 20:55:08 -04:00
43a566fe2f Version bump 2021-09-24 20:33:19 -04:00
b2631a86c8 Rework index picker 2021-09-24 20:31:11 -04:00
d0a1deca30 Fix thumbnail in DocInfoModal.vue 2021-09-24 19:40:06 -04:00
b03ce90a05 Fix max_analyzed_offset error 2021-09-20 21:01:23 -04:00
a5eacb4950 Set list item color for sub-documents 2021-09-20 20:40:48 -04:00
0887046b41 Fix sidecar files, better error handling in store_write 2021-09-20 20:34:05 -04:00
0331d46fff Merge pull request #186 from simon987/dev
v2.11.2
2021-09-06 14:14:51 -04:00
22 changed files with 148 additions and 107 deletions

View File

@@ -50,7 +50,8 @@ sist2 (Simple incremental search tool)
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
1. *(or)* `docker pull simon987/sist2:2.11.2-x64-linux`
1. See [Usage guide](docs/USAGE.md)
@@ -70,19 +71,20 @@ See [Usage guide](docs/USAGE.md) for more details
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
`text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
wpd (WordPerfect) | libwpd | yes | no | *planned* |
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -135,7 +137,7 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
```
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
1. Install vcpkg dependencies

View File

@@ -49,6 +49,7 @@ Scan options
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
--checksums Calculate file checksums when scanning.
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
@@ -129,6 +130,9 @@ Exec-script options
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
### Scan examples

View File

@@ -2,7 +2,8 @@
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
},
"analysis": {
"tokenizer": {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -23,7 +23,6 @@
"vue-color": "^2.8.1",
"vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0"
@@ -13604,15 +13603,6 @@
"node": ">=10"
}
},
"node_modules/vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w==",
"engines": {
"node": ">= 4.0.0",
"npm": ">= 3.0.0"
}
},
"node_modules/vue-observe-visibility": {
"version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
@@ -26376,11 +26366,6 @@
"vue-observe-visibility": "^0.4.6"
}
},
"vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w=="
},
"vue-observe-visibility": {
"version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",

View File

@@ -22,7 +22,6 @@
"vue-color": "^2.8.1",
"vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0"

View File

@@ -187,7 +187,8 @@ class Sist2Query {
"name.nGram": {},
"content.nGram": {},
font_name: {},
}
},
max_analyzed_offset: 9_999_999
};
if (getters.optSearchInPath) {
q.highlight.fields["path.text"] = {};

View File

@@ -4,7 +4,8 @@
<template #modal-title>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
</template>
<img :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<InfoTable :doc="doc"></InfoTable>

View File

@@ -1,5 +1,5 @@
<template>
<b-list-group-item class="flex-column align-items-start mb-2">
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
<!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
@@ -40,9 +40,11 @@
</div>
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
<span v-if="doc._source.pages">{{ doc._source.pages }} {{ doc._source.pages > 1 ? $t("pages") : $t("page") }}</span>
<span v-if="doc._source.pages">{{ doc._source.pages }} {{
doc._source.pages > 1 ? $t("pages") : $t("page")
}}</span>
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
<span v-if="doc._source.author">{{doc._source.author}}</span>
<span v-if="doc._source.author">{{ doc._source.author }}</span>
</div>
</div>
</div>
@@ -89,6 +91,14 @@ export default {
</script>
<style scoped>
.sub-document {
background: #AB47BC1F !important;
}
.theme-black .sub-document {
background: #37474F !important;
}
.list-group {
margin-top: 1em;
}

View File

@@ -1,93 +1,95 @@
<template>
<VueMultiselect
multiple
label="name"
:value="selectedIndices"
:options="indices"
:close-on-select="indices.length <= 1"
:placeholder="$t('indexPickerPlaceholder')"
@select="addItem"
@remove="removeItem">
<template slot="option" slot-scope="idx">
<b-row>
<b-col>
<span class="mr-1">{{ idx.option.name }}</span>
<SmallBadge pill :text="idx.option.version"></SmallBadge>
</b-col>
</b-row>
<b-row class="mt-1">
<b-col>
<span>{{ formatIdxDate(idx.option.timestamp) }}</span>
</b-col>
</b-row>
</template>
</VueMultiselect>
<div v-if="isMobile">
<b-form-select
:value="selectedIndicesIds"
@change="onSelect($event)"
:options="indices" multiple :select-size="6" text-field="name"
value-field="id"></b-form-select>
</div>
<div v-else>
<b-list-group id="index-picker-desktop">
<b-list-group-item
v-for="idx in indices"
@click="toggleIndex(idx)"
class="d-flex justify-content-between align-items-center list-group-item-action pointer">
<div class="d-flex">
<b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
{{ idx.name }}
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
</div>
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
</b-list-group-item>
</b-list-group>
</div>
</template>
<script lang="ts">
import VueMultiselect from "vue-multiselect"
import SmallBadge from "./SmallBadge.vue"
import {mapActions, mapGetters} from "vuex";
import {Index} from "@/Sist2Api";
import Vue from "vue";
import {format} from "date-fns";
export default Vue.extend({
components: {
VueMultiselect,
SmallBadge
},
data() {
return {
loading: true
loading: true,
}
},
computed: {
...mapGetters([
"indices", "selectedIndices"
]),
selectedIndicesIds() {
return this.selectedIndices.map(idx => idx.id)
},
isMobile() {
return window.innerWidth <= 650;
}
},
methods: {
...mapActions({
setSelectedIndices: "setSelectedIndices"
}),
removeItem(val: Index): void {
this.setSelectedIndices(this.selectedIndices.filter((item: Index) => item !== val))
},
addItem(val: Index): void {
this.setSelectedIndices([...this.selectedIndices, val])
onSelect(value) {
this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
},
formatIdxDate(timestamp: number): string {
return format(new Date(timestamp * 1000), "yyyy-MM-dd");
},
toggleIndex(index) {
if (this.isSelected(index)) {
this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
} else {
this.setSelectedIndices([index, ...this.selectedIndices]);
}
},
isSelected(index) {
return this.selectedIndices.find(idx => idx.id == index.id) != null;
}
},
})
</script>
<style src="vue-multiselect/dist/vue-multiselect.min.css"></style>
<style>
.multiselect__option {
padding: 5px 10px;
<style scoped>
.timestamp-text {
line-height: 24px;
font-size: 80%;
}
.multiselect__content-wrapper {
overflow: hidden;
.version-badge {
color: #222 !important;
background: none;
}
.theme-black .multiselect__tags {
background: #37474F;
border: 1px solid #616161 !important
.list-group-item {
padding: 0.2em 0.4em;
}
.theme-black .multiselect__input {
color: #dbdbdb;
background: #37474F;
}
.theme-black .multiselect__content-wrapper {
border: none
#index-picker-desktop {
overflow-y: auto;
max-height: 132px;
}
</style>

File diff suppressed because one or more lines are too long

View File

@@ -4,6 +4,7 @@
store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
strcpy(store->path, path);
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
@@ -78,27 +79,57 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size += buf_len;
int db_full = FALSE;
int should_abort_transaction = FALSE;
if (put_ret == MDB_MAP_FULL) {
mdb_txn_abort(txn);
db_full = TRUE;
should_abort_transaction = TRUE;
} else {
int commit_ret = mdb_txn_commit(txn);
if (commit_ret == MDB_MAP_FULL) {
db_full = TRUE;
}
}
if (db_full) {
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
if (should_abort_transaction) {
mdb_txn_abort(txn);
}
pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
if (resize_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
if (put_ret_retry != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
int ret = mdb_txn_commit(txn);
if (ret != 0) {
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
store->path, mdb_strerror(ret), ret,
put_ret, put_ret_retry);
}
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
} else if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
pthread_rwlock_unlock(&store->lock);
#endif
}

View File

@@ -6,12 +6,12 @@
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_TN (1024 * 1024 * 5)
#define STORE_SIZE_TAG (1024 * 1024)
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
char *path;
char path[PATH_MAX];
char *tmp_path;
MDB_dbi dbi;
MDB_env *env;

View File

@@ -39,7 +39,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
}
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {

View File

@@ -179,7 +179,7 @@ void parse(void *arg) {
IS_ARC(doc->mime) ||
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
)) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc);
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
@@ -189,6 +189,8 @@ void parse(void *arg) {
} else if (doc->mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, doc);
CLOSE_FILE(job->vfile)
free(doc->filepath);
free(doc);
return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);

View File

@@ -27,7 +27,10 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);

View File

@@ -51,7 +51,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.11.2"
#define VERSION "2.11.3"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

File diff suppressed because one or more lines are too long