Compare commits

..

13 Commits

Author SHA1 Message Date
58741058cf Merge pull request #200 from simon987/dev
v2.11.3
2021-09-24 20:56:00 -04:00
0a7e59b646 Some documentation updates 2021-09-24 20:55:08 -04:00
43a566fe2f Version bump 2021-09-24 20:33:19 -04:00
b2631a86c8 Rework index picker 2021-09-24 20:31:11 -04:00
d0a1deca30 Fix thumbnail in DocInfoModal.vue 2021-09-24 19:40:06 -04:00
b03ce90a05 Fix max_analyzed_offset error 2021-09-20 21:01:23 -04:00
a5eacb4950 Set list item color for sub-documents 2021-09-20 20:40:48 -04:00
0887046b41 Fix sidecar files, better error handling in store_write 2021-09-20 20:34:05 -04:00
17fda1e540 Support for rewind buffer 2021-09-11 20:46:40 -04:00
34b363bfd8 Add argument to calculate checksums 2021-09-11 14:31:48 -04:00
c9aa4bed72 Add argument to calculate checksums 2021-09-11 14:31:31 -04:00
7267d4bd2c Add basic JSON/NDJSON support 2021-09-07 08:14:32 -04:00
43470e9ce6 Add basic JSON/NDJSON support 2021-09-06 21:27:17 -04:00
36 changed files with 675 additions and 553 deletions

View File

@@ -50,7 +50,8 @@ sist2 (Simple incremental search tool)
```
1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)*
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
1. *(or)* `docker pull simon987/sist2:2.11.2-x64-linux`
1. See [Usage guide](docs/USAGE.md)
@@ -70,19 +71,20 @@ See [Usage guide](docs/USAGE.md) for more details
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - |
cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - |
html, xml | *(none)* | yes | no | - |
`text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title |
docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
wpd (WordPerfect) | libwpd | yes | no | *planned* |
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -135,7 +137,7 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```bash
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git
```
1. Apply vcpkg patches, as per [sist2-build](https://github.com/simon987/sist2-build) Dockerfile
1. Install vcpkg dependencies

View File

@@ -49,6 +49,7 @@ Scan options
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
--checksums Calculate file checksums when scanning.
Index options
-t, --threads=<int> Number of threads. DEFAULT=1
@@ -129,6 +130,9 @@ Exec-script options
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
### Scan examples

View File

@@ -4,6 +4,10 @@
"type": "keyword",
"doc_values": true
},
"checksum": {
"type": "keyword",
"index": false
},
"_depth": {
"type": "integer"
},

View File

@@ -2,7 +2,8 @@
"index": {
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
},
"analysis": {
"tokenizer": {

View File

@@ -22,6 +22,7 @@ application/java-archive, jar
application/java, class
application/javascript,
application/json, json
application/ndjson, jsonl|ndjson
application/marc, mrc
application/mbedlet, mbd
application/mime, aps
1 application/arj arj
22 application/java class
23 application/javascript
24 application/json json
25 application/ndjson jsonl|ndjson
26 application/marc mrc
27 application/mbedlet mbd
28 application/mime aps

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -23,7 +23,6 @@
"vue-color": "^2.8.1",
"vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0"
@@ -13604,15 +13603,6 @@
"node": ">=10"
}
},
"node_modules/vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w==",
"engines": {
"node": ">= 4.0.0",
"npm": ">= 3.0.0"
}
},
"node_modules/vue-observe-visibility": {
"version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
@@ -26376,11 +26366,6 @@
"vue-observe-visibility": "^0.4.6"
}
},
"vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w=="
},
"vue-observe-visibility": {
"version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",

View File

@@ -22,7 +22,6 @@
"vue-color": "^2.8.1",
"vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0"

View File

@@ -50,6 +50,7 @@ export interface EsHit {
height: number
duration: number
tag: string[]
checksum: string
}
_props: {
isSubDocument: boolean

View File

@@ -187,7 +187,8 @@ class Sist2Query {
"name.nGram": {},
"content.nGram": {},
font_name: {},
}
},
max_analyzed_offset: 9_999_999
};
if (getters.optSearchInPath) {
q.highlight.fields["path.text"] = {};

View File

@@ -4,7 +4,8 @@
<template #modal-title>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
</template>
<img :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<InfoTable :doc="doc"></InfoTable>

View File

@@ -1,5 +1,5 @@
<template>
<b-list-group-item class="flex-column align-items-start mb-2">
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
<!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
@@ -40,9 +40,11 @@
</div>
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
<span v-if="doc._source.pages">{{ doc._source.pages }} {{ doc._source.pages > 1 ? $t("pages") : $t("page") }}</span>
<span v-if="doc._source.pages">{{ doc._source.pages }} {{
doc._source.pages > 1 ? $t("pages") : $t("page")
}}</span>
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
<span v-if="doc._source.author">{{doc._source.author}}</span>
<span v-if="doc._source.author">{{ doc._source.author }}</span>
</div>
</div>
</div>
@@ -89,6 +91,14 @@ export default {
</script>
<style scoped>
.sub-document {
background: #AB47BC1F !important;
}
.theme-black .sub-document {
background: #37474F !important;
}
.list-group {
margin-top: 1em;
}

View File

@@ -1,93 +1,95 @@
<template>
<VueMultiselect
multiple
label="name"
:value="selectedIndices"
:options="indices"
:close-on-select="indices.length <= 1"
:placeholder="$t('indexPickerPlaceholder')"
@select="addItem"
@remove="removeItem">
<template slot="option" slot-scope="idx">
<b-row>
<b-col>
<span class="mr-1">{{ idx.option.name }}</span>
<SmallBadge pill :text="idx.option.version"></SmallBadge>
</b-col>
</b-row>
<b-row class="mt-1">
<b-col>
<span>{{ formatIdxDate(idx.option.timestamp) }}</span>
</b-col>
</b-row>
</template>
</VueMultiselect>
<div v-if="isMobile">
<b-form-select
:value="selectedIndicesIds"
@change="onSelect($event)"
:options="indices" multiple :select-size="6" text-field="name"
value-field="id"></b-form-select>
</div>
<div v-else>
<b-list-group id="index-picker-desktop">
<b-list-group-item
v-for="idx in indices"
@click="toggleIndex(idx)"
class="d-flex justify-content-between align-items-center list-group-item-action pointer">
<div class="d-flex">
<b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
{{ idx.name }}
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
</div>
<b-badge class="version-badge">v{{ idx.version }}</b-badge>
</b-list-group-item>
</b-list-group>
</div>
</template>
<script lang="ts">
import VueMultiselect from "vue-multiselect"
import SmallBadge from "./SmallBadge.vue"
import {mapActions, mapGetters} from "vuex";
import {Index} from "@/Sist2Api";
import Vue from "vue";
import {format} from "date-fns";
export default Vue.extend({
components: {
VueMultiselect,
SmallBadge
},
data() {
return {
loading: true
loading: true,
}
},
computed: {
...mapGetters([
"indices", "selectedIndices"
]),
selectedIndicesIds() {
return this.selectedIndices.map(idx => idx.id)
},
isMobile() {
return window.innerWidth <= 650;
}
},
methods: {
...mapActions({
setSelectedIndices: "setSelectedIndices"
}),
removeItem(val: Index): void {
this.setSelectedIndices(this.selectedIndices.filter((item: Index) => item !== val))
},
addItem(val: Index): void {
this.setSelectedIndices([...this.selectedIndices, val])
onSelect(value) {
this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
},
formatIdxDate(timestamp: number): string {
return format(new Date(timestamp * 1000), "yyyy-MM-dd");
},
toggleIndex(index) {
if (this.isSelected(index)) {
this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
} else {
this.setSelectedIndices([index, ...this.selectedIndices]);
}
},
isSelected(index) {
return this.selectedIndices.find(idx => idx.id == index.id) != null;
}
},
})
</script>
<style src="vue-multiselect/dist/vue-multiselect.min.css"></style>
<style>
.multiselect__option {
padding: 5px 10px;
<style scoped>
.timestamp-text {
line-height: 24px;
font-size: 80%;
}
.multiselect__content-wrapper {
overflow: hidden;
.version-badge {
color: #222 !important;
background: none;
}
.theme-black .multiselect__tags {
background: #37474F;
border: 1px solid #616161 !important
.list-group-item {
padding: 0.2em 0.4em;
}
.theme-black .multiselect__input {
color: #dbdbdb;
background: #37474F;
}
.theme-black .multiselect__content-wrapper {
border: none
#index-picker-desktop {
overflow-y: auto;
max-height: 132px;
}
</style>

View File

@@ -3,7 +3,7 @@
<template #cell(value)="data">
<span v-if="'html' in data.item" v-html="data.item.html"></span>
<span v-else>{{data.value}}</span>
<span v-else>{{ data.value }}</span>
</template>
</b-table>
</template>
@@ -57,7 +57,8 @@ export default {
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
"modified_by", "pages", "tag",
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
"checksum"
];
fields.forEach(field => {
@@ -76,9 +77,9 @@ export default {
items.push({
key: "Exif GPS",
html: makeGpsLink(
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
),
dmsToDecimal(src["exif_gps_latitude_dms"], src["exif_gps_latitude_ref"]),
dmsToDecimal(src["exif_gps_longitude_dms"], src["exif_gps_longitude_ref"]),
),
});
}

View File

@@ -62,7 +62,8 @@ export default {
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
slideDuration: "Slide duration",
resultSize: "Number of results per page",
tagOrOperator: "Use OR operator when specifying multiple tags."
tagOrOperator: "Use OR operator when specifying multiple tags.",
hideDuplicates: "Hide duplicate results based on checksum"
},
queryMode: {
simple: "Simple",
@@ -209,7 +210,8 @@ export default {
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
slideDuration: "Durée des diapositives",
resultSize: "Nombre de résultats par page",
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags"
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
hideDuplicates: "Masquer les résultats en double"
},
queryMode: {
simple: "Simple",

View File

@@ -27,6 +27,7 @@ export default new Vuex.Store({
size: 60,
optLang: "en",
optHideDuplicates: true,
optTheme: "light",
optDisplay: "grid",
@@ -79,6 +80,7 @@ export default new Vuex.Store({
setSizeMax: (state, val) => state.sizeMax = val,
setSist2Info: (state, val) => state.sist2Info = val,
setSeed: (state, val) => state.seed = val,
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
setOptLang: (state, val) => state.optLang = val,
setSortMode: (state, val) => state.sortMode = val,
setIndices: (state, val) => {
@@ -317,6 +319,7 @@ export default new Vuex.Store({
uiLightboxKey: state => state.uiLightboxKey,
uiLightboxSlide: state => state.uiLightboxSlide,
optHideDuplicates: state => state.optHideDuplicates,
optLang: state => state.optLang,
optTheme: state => state.optTheme,
optDisplay: state => state.optDisplay,

View File

@@ -35,6 +35,11 @@
<br/>
<h4>{{ $t("searchOptions") }}</h4>
<b-card>
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
$t("opt.hideDuplicates")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
$t("opt.tagOrOperator")
@@ -206,10 +211,10 @@ export default {
"optTreemapSize",
"optLightboxLoadOnlyCurrent",
"optLightboxSlideDuration",
"optContainerWidth",
"optResultSize",
"optTagOrOperator",
"optLang"
"optLang",
"optHideDuplicates",
]),
clientWidth() {
return window.innerWidth;
@@ -248,7 +253,8 @@ export default {
"setOptContainerWidth",
"setOptResultSize",
"setOptTagOrOperator",
"setOptLang"
"setOptLang",
"setOptHideDuplicates"
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@@ -91,6 +91,7 @@ export default Vue.extend({
search: undefined as any,
docs: [] as EsHit[],
docIds: new Set(),
docChecksums: new Set(),
searchBusy: false,
Sist2Query: Sist2Query,
showHelp: false
@@ -193,6 +194,7 @@ export default Vue.extend({
async clearResults() {
this.docs = [];
this.docIds.clear();
this.docChecksums.clear();
await this.$store.dispatch("clearResults");
this.$store.commit("setUiReachedScrollEnd", false);
},
@@ -202,7 +204,19 @@ export default Vue.extend({
}
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
if (this.$store.state.optHideDuplicates) {
resp.hits.hits = resp.hits.hits.filter(hit => {
if (!("checksum" in hit._source)) {
return true;
}
const isDupe = !this.docChecksums.has(hit._source.checksum);
this.docChecksums.add(hit._source.checksum);
return isDupe;
});
}
for (const hit of resp.hits.hits) {
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {

View File

@@ -28,6 +28,7 @@ typedef struct scan_args {
int max_memory_buffer;
int read_subtitles;
int fast_epub;
int calculate_checksums;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@@ -15,6 +15,7 @@
#include "libscan/raw/raw.h"
#include "libscan/msdoc/msdoc.h"
#include "libscan/wpd/wpd.h"
#include "libscan/json/json.h"
#include "src/io/store.h"
#include <glib.h>
@@ -32,6 +33,7 @@ typedef struct {
int threads;
int depth;
int calculate_checksums;
size_t stat_tn_size;
size_t stat_index_size;
@@ -62,6 +64,7 @@ typedef struct {
scan_raw_ctx_t raw_ctx;
scan_msdoc_ctx_t msdoc_ctx;
scan_wpd_ctx_t wpd_ctx;
scan_json_ctx_t json_ctx;
} ScanCtx_t;
typedef struct {

File diff suppressed because one or more lines are too long

View File

@@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec";
case MetaChecksum:
return "checksum";
default:
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
}
@@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaChecksum:
case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val);

View File

@@ -4,6 +4,7 @@
store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
strcpy(store->path, path);
#if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size;
@@ -78,27 +79,57 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size += buf_len;
int db_full = FALSE;
int should_abort_transaction = FALSE;
if (put_ret == MDB_MAP_FULL) {
mdb_txn_abort(txn);
db_full = TRUE;
should_abort_transaction = TRUE;
} else {
int commit_ret = mdb_txn_commit(txn);
if (commit_ret == MDB_MAP_FULL) {
db_full = TRUE;
}
}
if (db_full) {
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
if (should_abort_transaction) {
mdb_txn_abort(txn);
}
pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction.
// Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock);
store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size);
int resize_ret = mdb_env_set_mapsize(store->env, store->size);
if (resize_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
mdb_txn_begin(store->env, NULL, 0, &txn);
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
if (put_ret_retry != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
int ret = mdb_txn_commit(txn);
if (ret != 0) {
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
store->path, mdb_strerror(ret), ret,
put_ret, put_ret_retry);
}
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
} else if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
pthread_rwlock_unlock(&store->lock);
#endif
}

View File

@@ -6,12 +6,12 @@
#include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_TN (1024 * 1024 * 5)
#define STORE_SIZE_TAG (1024 * 1024)
#define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t {
char *path;
char path[PATH_MAX];
char *tmp_path;
MDB_dbi dbi;
MDB_env *env;

View File

@@ -24,16 +24,22 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;
// Filesystem reads are always rewindable
job->vfile.read_rewindable = fs_read;
job->vfile.reset = fs_reset;
job->vfile.close = fs_close;
job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE;
job->vfile.has_checksum = FALSE;
job->vfile.rewind_buffer_size = 0;
job->vfile.rewind_buffer = NULL;
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
return job;
}
int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {

View File

@@ -170,6 +170,8 @@ void initialize_scan_context(scan_args_t *args) {
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
// Archive
ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log;
@@ -259,10 +261,18 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
// Wpd
ScanCtx.wpd_ctx.content_size = args->content_size;
ScanCtx.wpd_ctx.log = _log;
ScanCtx.wpd_ctx.logf = _logf;
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
// Json
ScanCtx.json_ctx.content_size = args->content_size;
ScanCtx.json_ctx.log = _log;
ScanCtx.json_ctx.logf = _logf;
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
}
@@ -508,8 +518,8 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler);
// sigsegv_handler = signal(SIGSEGV, sig_handler);
// sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, "");
@@ -566,6 +576,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@@ -35,425 +35,426 @@ enum mime {
application_mime=655387,
application_mspowerpoint=655388,
application_msword=655389,
application_netmc=655390,
application_octet_stream=655391,
application_oda=655392,
application_ogg=655393,
application_pdf=655394 | 0x40000000,
application_pgp_keys=655395,
application_pgp_signature=655396,
application_pkcs7_signature=655397,
application_pkix_cert=655398,
application_postscript=655399,
application_pro_eng=655400,
application_ringing_tones=655401,
application_smil=655402,
application_solids=655403,
application_sounder=655404,
application_step=655405,
application_streamingmedia=655406,
application_vda=655407,
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000,
application_vnd_coffeescript=655409,
application_vnd_fdf=655410,
application_vnd_font_fontforge_sfd=655411,
application_vnd_hp_hpgl=655412,
application_vnd_iccprofile=655413,
application_vnd_lotus_1_2_3=655414,
application_vnd_ms_cab_compressed=655415,
application_vnd_ms_excel=655416,
application_vnd_ms_fontobject=655417,
application_vnd_ms_opentype=655418 | 0x20000000,
application_vnd_ms_outlook=655419,
application_vnd_ms_pki_certstore=655420,
application_vnd_ms_pki_pko=655421,
application_vnd_ms_pki_seccat=655422,
application_vnd_ms_powerpoint=655423,
application_vnd_ms_project=655424,
application_vnd_oasis_opendocument_base=655425,
application_vnd_oasis_opendocument_formula=655426,
application_vnd_oasis_opendocument_graphics=655427,
application_vnd_oasis_opendocument_presentation=655428,
application_vnd_oasis_opendocument_spreadsheet=655429,
application_vnd_oasis_opendocument_text=655430,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000,
application_vnd_symbian_install=655434,
application_vnd_tcpdump_pcap=655435,
application_vnd_wap_wmlc=655436,
application_vnd_wap_wmlscriptc=655437,
application_vnd_xara=655438,
application_vocaltec_media_desc=655439,
application_vocaltec_media_file=655440,
application_warc=655441,
application_winhelp=655442,
application_wordperfect=655443,
application_x_123=655444,
application_x_7z_compressed=655445 | 0x10000000,
application_x_aim=655446,
application_x_apple_diskimage=655447,
application_x_arc=655448 | 0x10000000,
application_x_archive=655449,
application_x_atari_7800_rom=655450,
application_x_authorware_bin=655451,
application_x_authorware_map=655452,
application_x_authorware_seg=655453,
application_x_avira_qua=655454,
application_x_bcpio=655455,
application_x_bittorrent=655456,
application_x_bsh=655457,
application_x_bytecode_python=655458,
application_x_bzip=655459,
application_x_bzip2=655460 | 0x08000000,
application_x_cbr=655461,
application_x_cbz=655462,
application_x_cdlink=655463,
application_x_chat=655464,
application_x_chrome_extension=655465,
application_x_cocoa=655466,
application_x_conference=655467,
application_x_coredump=655468,
application_x_cpio=655469,
application_x_dbf=655470,
application_x_dbt=655471,
application_x_debian_package=655472,
application_x_deepv=655473,
application_x_director=655474,
application_x_dmp=655475,
application_x_dosdriver=655476,
application_x_dosexec=655477,
application_x_dvi=655478,
application_x_elc=655479,
application_ndjson=655390,
application_netmc=655391,
application_octet_stream=655392,
application_oda=655393,
application_ogg=655394,
application_pdf=655395 | 0x40000000,
application_pgp_keys=655396,
application_pgp_signature=655397,
application_pkcs7_signature=655398,
application_pkix_cert=655399,
application_postscript=655400,
application_pro_eng=655401,
application_ringing_tones=655402,
application_smil=655403,
application_solids=655404,
application_sounder=655405,
application_step=655406,
application_streamingmedia=655407,
application_vda=655408,
application_vnd_amazon_mobi8_ebook=655409 | 0x02000000,
application_vnd_coffeescript=655410,
application_vnd_fdf=655411,
application_vnd_font_fontforge_sfd=655412,
application_vnd_hp_hpgl=655413,
application_vnd_iccprofile=655414,
application_vnd_lotus_1_2_3=655415,
application_vnd_ms_cab_compressed=655416,
application_vnd_ms_excel=655417,
application_vnd_ms_fontobject=655418,
application_vnd_ms_opentype=655419 | 0x20000000,
application_vnd_ms_outlook=655420,
application_vnd_ms_pki_certstore=655421,
application_vnd_ms_pki_pko=655422,
application_vnd_ms_pki_seccat=655423,
application_vnd_ms_powerpoint=655424,
application_vnd_ms_project=655425,
application_vnd_oasis_opendocument_base=655426,
application_vnd_oasis_opendocument_formula=655427,
application_vnd_oasis_opendocument_graphics=655428,
application_vnd_oasis_opendocument_presentation=655429,
application_vnd_oasis_opendocument_spreadsheet=655430,
application_vnd_oasis_opendocument_text=655431,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000,
application_vnd_symbian_install=655435,
application_vnd_tcpdump_pcap=655436,
application_vnd_wap_wmlc=655437,
application_vnd_wap_wmlscriptc=655438,
application_vnd_xara=655439,
application_vocaltec_media_desc=655440,
application_vocaltec_media_file=655441,
application_warc=655442,
application_winhelp=655443,
application_wordperfect=655444,
application_x_123=655445,
application_x_7z_compressed=655446 | 0x10000000,
application_x_aim=655447,
application_x_apple_diskimage=655448,
application_x_arc=655449 | 0x10000000,
application_x_archive=655450,
application_x_atari_7800_rom=655451,
application_x_authorware_bin=655452,
application_x_authorware_map=655453,
application_x_authorware_seg=655454,
application_x_avira_qua=655455,
application_x_bcpio=655456,
application_x_bittorrent=655457,
application_x_bsh=655458,
application_x_bytecode_python=655459,
application_x_bzip=655460,
application_x_bzip2=655461 | 0x08000000,
application_x_cbr=655462,
application_x_cbz=655463,
application_x_cdlink=655464,
application_x_chat=655465,
application_x_chrome_extension=655466,
application_x_cocoa=655467,
application_x_conference=655468,
application_x_coredump=655469,
application_x_cpio=655470,
application_x_dbf=655471,
application_x_dbt=655472,
application_x_debian_package=655473,
application_x_deepv=655474,
application_x_director=655475,
application_x_dmp=655476,
application_x_dosdriver=655477,
application_x_dosexec=655478,
application_x_dvi=655479,
application_x_elc=655480,
application_x_empty=1,
application_x_envoy=655480,
application_x_esrehber=655481,
application_x_excel=655482,
application_x_executable=655483,
application_x_font_gdos=655484,
application_x_font_pf2=655485,
application_x_font_pfm=655486,
application_x_font_sfn=655487,
application_x_font_ttf=655488 | 0x20000000,
application_x_fptapplication_x_dbt=655489,
application_x_freelance=655490,
application_x_gamecube_rom=655491,
application_x_gdbm=655492,
application_x_gettext_translation=655493,
application_x_git=655494,
application_x_gsp=655495,
application_x_gss=655496,
application_x_gtar=655497,
application_x_gzip=655498,
application_x_hdf=655499,
application_x_helpfile=655500,
application_x_httpd_imap=655501,
application_x_ima=655502,
application_x_innosetup=655503,
application_x_internett_signup=655504,
application_x_inventor=655505,
application_x_ip2=655506,
application_x_java_applet=655507,
application_x_java_commerce=655508,
application_x_java_image=655509,
application_x_java_jmod=655510,
application_x_java_keystore=655511,
application_x_kdelnk=655512,
application_x_koan=655513,
application_x_latex=655514,
application_x_livescreen=655515,
application_x_lotus=655516,
application_x_lz4=655517 | 0x08000000,
application_x_lz4_json=655518,
application_x_lzh=655519,
application_x_lzh_compressed=655520,
application_x_lzip=655521 | 0x08000000,
application_x_lzma=655522 | 0x08000000,
application_x_lzop=655523 | 0x08000000,
application_x_lzx=655524,
application_x_mach_binary=655525,
application_x_mach_executable=655526,
application_x_magic_cap_package_1_0=655527,
application_x_mathcad=655528,
application_x_maxis_dbpf=655529,
application_x_meme=655530,
application_x_midi=655531,
application_x_mif=655532,
application_x_mix_transfer=655533,
application_x_mobipocket_ebook=655534 | 0x02000000,
application_x_ms_compress_szdd=655535,
application_x_ms_pdb=655536,
application_x_ms_reader=655537,
application_x_msaccess=655538,
application_x_n64_rom=655539,
application_x_navi_animation=655540,
application_x_navidoc=655541,
application_x_navimap=655542,
application_x_navistyle=655543,
application_x_nes_rom=655544,
application_x_netcdf=655545,
application_x_newton_compatible_pkg=655546,
application_x_nintendo_ds_rom=655547,
application_x_object=655548,
application_x_omc=655549,
application_x_omcdatamaker=655550,
application_x_omcregerator=655551,
application_x_pagemaker=655552,
application_x_pcl=655553,
application_x_pgp_keyring=655554,
application_x_pixclscript=655555,
application_x_pkcs7_certreqresp=655556,
application_x_pkcs7_signature=655557,
application_x_project=655558,
application_x_qpro=655559,
application_x_rar=655560 | 0x10000000,
application_x_rpm=655561,
application_x_sdp=655562,
application_x_sea=655563,
application_x_seelogo=655564,
application_x_setupscript=655565,
application_x_shar=655566,
application_x_sharedlib=655567,
application_x_shockwave_flash=655568,
application_x_snappy_framed=655569,
application_x_sprite=655570,
application_x_sqlite3=655571,
application_x_stargallery_thm=655572,
application_x_stuffit=655573,
application_x_sv4cpio=655574,
application_x_sv4crc=655575,
application_x_tar=655576 | 0x10000000,
application_x_tbook=655577,
application_x_terminfo=655578,
application_x_terminfo2=655579,
application_x_tex_tfm=655580,
application_x_texinfo=655581,
application_x_ustar=655582,
application_x_visio=655583,
application_x_vnd_audioexplosion_mzz=655584,
application_x_vnd_ls_xpix=655585,
application_x_vrml=655586,
application_x_wais_source=655587,
application_x_wine_extension_ini=655588,
application_x_wintalk=655589,
application_x_world=655590,
application_x_wri=655591,
application_x_x509_ca_cert=655592,
application_x_xz=655593 | 0x08000000,
application_x_zip=655594,
application_x_zstd=655595 | 0x08000000,
application_x_zstd_dictionary=655596,
application_xml=655597,
application_zip=655598 | 0x10000000,
application_zlib=655599,
audio_basic=458992 | 0x80000000,
audio_it=458993,
audio_make=458994,
audio_mid=458995,
audio_midi=458996,
audio_mp4=458997,
audio_mpeg=458998,
audio_ogg=458999,
audio_s3m=459000,
audio_tsp_audio=459001,
audio_tsplayer=459002,
audio_vnd_qcelp=459003,
audio_voxware=459004,
audio_x_aiff=459005,
audio_x_flac=459006,
audio_x_gsm=459007,
audio_x_hx_aac_adts=459008,
audio_x_jam=459009,
audio_x_liveaudio=459010,
audio_x_m4a=459011,
audio_x_midi=459012,
audio_x_mod=459013,
audio_x_mp4a_latm=459014,
audio_x_mpeg_3=459015,
audio_x_mpequrl=459016,
audio_x_nspaudio=459017,
audio_x_pn_realaudio=459018,
audio_x_psid=459019,
audio_x_realaudio=459020,
audio_x_s3m=459021,
audio_x_twinvq=459022,
audio_x_twinvq_plugin=459023,
audio_x_voc=459024,
audio_x_wav=459025,
audio_x_xbox_executable=459026 | 0x80000000,
audio_x_xbox360_executable=459027 | 0x80000000,
audio_xm=459028,
font_otf=327957 | 0x20000000,
font_sfnt=327958 | 0x20000000,
font_woff=327959 | 0x20000000,
font_woff2=327960 | 0x20000000,
image_bmp=524569,
image_cmu_raster=524570,
image_fif=524571,
image_florian=524572,
image_g3fax=524573,
image_gif=524574,
image_heic=524575,
image_ief=524576,
image_jpeg=524577,
image_jutvision=524578,
image_naplps=524579,
image_pict=524580,
image_png=524581,
image_svg=524582 | 0x80000000,
image_svg_xml=524583 | 0x80000000,
image_tiff=524584,
image_vnd_adobe_photoshop=524585 | 0x80000000,
image_vnd_djvu=524586 | 0x80000000,
image_vnd_fpx=524587,
image_vnd_microsoft_icon=524588,
image_vnd_rn_realflash=524589,
image_vnd_rn_realpix=524590,
image_vnd_wap_wbmp=524591,
image_vnd_xiff=524592,
image_webp=524593,
image_wmf=524594,
image_x_3ds=524595,
image_x_adobe_dng=524596 | 0x00800000,
image_x_award_bioslogo=524597,
image_x_canon_cr2=524598 | 0x00800000,
image_x_canon_crw=524599 | 0x00800000,
image_x_cmu_raster=524600,
image_x_cur=524601,
image_x_dcraw=524602 | 0x00800000,
image_x_dwg=524603,
image_x_eps=524604,
image_x_epson_erf=524605 | 0x00800000,
image_x_exr=524606,
image_x_fuji_raf=524607 | 0x00800000,
image_x_gem=524608,
image_x_icns=524609,
image_x_icon=524610 | 0x80000000,
image_x_jg=524611,
image_x_jps=524612,
image_x_kodak_dcr=524613 | 0x00800000,
image_x_kodak_k25=524614 | 0x00800000,
image_x_kodak_kdc=524615 | 0x00800000,
image_x_minolta_mrw=524616 | 0x00800000,
image_x_ms_bmp=524617,
image_x_niff=524618,
image_x_nikon_nef=524619 | 0x00800000,
image_x_olympus_orf=524620 | 0x00800000,
image_x_panasonic_raw=524621 | 0x00800000,
image_x_pcx=524622,
image_x_pentax_pef=524623 | 0x00800000,
image_x_pict=524624,
image_x_portable_bitmap=524625,
image_x_portable_graymap=524626,
image_x_portable_pixmap=524627,
image_x_quicktime=524628,
image_x_rgb=524629,
image_x_sigma_x3f=524630 | 0x00800000,
image_x_sony_arw=524631 | 0x00800000,
image_x_sony_sr2=524632 | 0x00800000,
image_x_sony_srf=524633 | 0x00800000,
image_x_tga=524634,
image_x_tiff=524635,
image_x_win_bitmap=524636,
image_x_xcf=524637 | 0x80000000,
image_x_xpixmap=524638 | 0x80000000,
image_x_xwindowdump=524639,
message_news=196960,
message_rfc822=196961,
model_vnd_dwf=65890,
model_vnd_gdl=65891,
model_vnd_gs_gdl=65892,
model_vrml=65893,
model_x_pov=65894,
application_x_envoy=655481,
application_x_esrehber=655482,
application_x_excel=655483,
application_x_executable=655484,
application_x_font_gdos=655485,
application_x_font_pf2=655486,
application_x_font_pfm=655487,
application_x_font_sfn=655488,
application_x_font_ttf=655489 | 0x20000000,
application_x_fptapplication_x_dbt=655490,
application_x_freelance=655491,
application_x_gamecube_rom=655492,
application_x_gdbm=655493,
application_x_gettext_translation=655494,
application_x_git=655495,
application_x_gsp=655496,
application_x_gss=655497,
application_x_gtar=655498,
application_x_gzip=655499,
application_x_hdf=655500,
application_x_helpfile=655501,
application_x_httpd_imap=655502,
application_x_ima=655503,
application_x_innosetup=655504,
application_x_internett_signup=655505,
application_x_inventor=655506,
application_x_ip2=655507,
application_x_java_applet=655508,
application_x_java_commerce=655509,
application_x_java_image=655510,
application_x_java_jmod=655511,
application_x_java_keystore=655512,
application_x_kdelnk=655513,
application_x_koan=655514,
application_x_latex=655515,
application_x_livescreen=655516,
application_x_lotus=655517,
application_x_lz4=655518 | 0x08000000,
application_x_lz4_json=655519,
application_x_lzh=655520,
application_x_lzh_compressed=655521,
application_x_lzip=655522 | 0x08000000,
application_x_lzma=655523 | 0x08000000,
application_x_lzop=655524 | 0x08000000,
application_x_lzx=655525,
application_x_mach_binary=655526,
application_x_mach_executable=655527,
application_x_magic_cap_package_1_0=655528,
application_x_mathcad=655529,
application_x_maxis_dbpf=655530,
application_x_meme=655531,
application_x_midi=655532,
application_x_mif=655533,
application_x_mix_transfer=655534,
application_x_mobipocket_ebook=655535 | 0x02000000,
application_x_ms_compress_szdd=655536,
application_x_ms_pdb=655537,
application_x_ms_reader=655538,
application_x_msaccess=655539,
application_x_n64_rom=655540,
application_x_navi_animation=655541,
application_x_navidoc=655542,
application_x_navimap=655543,
application_x_navistyle=655544,
application_x_nes_rom=655545,
application_x_netcdf=655546,
application_x_newton_compatible_pkg=655547,
application_x_nintendo_ds_rom=655548,
application_x_object=655549,
application_x_omc=655550,
application_x_omcdatamaker=655551,
application_x_omcregerator=655552,
application_x_pagemaker=655553,
application_x_pcl=655554,
application_x_pgp_keyring=655555,
application_x_pixclscript=655556,
application_x_pkcs7_certreqresp=655557,
application_x_pkcs7_signature=655558,
application_x_project=655559,
application_x_qpro=655560,
application_x_rar=655561 | 0x10000000,
application_x_rpm=655562,
application_x_sdp=655563,
application_x_sea=655564,
application_x_seelogo=655565,
application_x_setupscript=655566,
application_x_shar=655567,
application_x_sharedlib=655568,
application_x_shockwave_flash=655569,
application_x_snappy_framed=655570,
application_x_sprite=655571,
application_x_sqlite3=655572,
application_x_stargallery_thm=655573,
application_x_stuffit=655574,
application_x_sv4cpio=655575,
application_x_sv4crc=655576,
application_x_tar=655577 | 0x10000000,
application_x_tbook=655578,
application_x_terminfo=655579,
application_x_terminfo2=655580,
application_x_tex_tfm=655581,
application_x_texinfo=655582,
application_x_ustar=655583,
application_x_visio=655584,
application_x_vnd_audioexplosion_mzz=655585,
application_x_vnd_ls_xpix=655586,
application_x_vrml=655587,
application_x_wais_source=655588,
application_x_wine_extension_ini=655589,
application_x_wintalk=655590,
application_x_world=655591,
application_x_wri=655592,
application_x_x509_ca_cert=655593,
application_x_xz=655594 | 0x08000000,
application_x_zip=655595,
application_x_zstd=655596 | 0x08000000,
application_x_zstd_dictionary=655597,
application_xml=655598,
application_zip=655599 | 0x10000000,
application_zlib=655600,
audio_basic=458993 | 0x80000000,
audio_it=458994,
audio_make=458995,
audio_mid=458996,
audio_midi=458997,
audio_mp4=458998,
audio_mpeg=458999,
audio_ogg=459000,
audio_s3m=459001,
audio_tsp_audio=459002,
audio_tsplayer=459003,
audio_vnd_qcelp=459004,
audio_voxware=459005,
audio_x_aiff=459006,
audio_x_flac=459007,
audio_x_gsm=459008,
audio_x_hx_aac_adts=459009,
audio_x_jam=459010,
audio_x_liveaudio=459011,
audio_x_m4a=459012,
audio_x_midi=459013,
audio_x_mod=459014,
audio_x_mp4a_latm=459015,
audio_x_mpeg_3=459016,
audio_x_mpequrl=459017,
audio_x_nspaudio=459018,
audio_x_pn_realaudio=459019,
audio_x_psid=459020,
audio_x_realaudio=459021,
audio_x_s3m=459022,
audio_x_twinvq=459023,
audio_x_twinvq_plugin=459024,
audio_x_voc=459025,
audio_x_wav=459026,
audio_x_xbox_executable=459027 | 0x80000000,
audio_x_xbox360_executable=459028 | 0x80000000,
audio_xm=459029,
font_otf=327958 | 0x20000000,
font_sfnt=327959 | 0x20000000,
font_woff=327960 | 0x20000000,
font_woff2=327961 | 0x20000000,
image_bmp=524570,
image_cmu_raster=524571,
image_fif=524572,
image_florian=524573,
image_g3fax=524574,
image_gif=524575,
image_heic=524576,
image_ief=524577,
image_jpeg=524578,
image_jutvision=524579,
image_naplps=524580,
image_pict=524581,
image_png=524582,
image_svg=524583 | 0x80000000,
image_svg_xml=524584 | 0x80000000,
image_tiff=524585,
image_vnd_adobe_photoshop=524586 | 0x80000000,
image_vnd_djvu=524587 | 0x80000000,
image_vnd_fpx=524588,
image_vnd_microsoft_icon=524589,
image_vnd_rn_realflash=524590,
image_vnd_rn_realpix=524591,
image_vnd_wap_wbmp=524592,
image_vnd_xiff=524593,
image_webp=524594,
image_wmf=524595,
image_x_3ds=524596,
image_x_adobe_dng=524597 | 0x00800000,
image_x_award_bioslogo=524598,
image_x_canon_cr2=524599 | 0x00800000,
image_x_canon_crw=524600 | 0x00800000,
image_x_cmu_raster=524601,
image_x_cur=524602,
image_x_dcraw=524603 | 0x00800000,
image_x_dwg=524604,
image_x_eps=524605,
image_x_epson_erf=524606 | 0x00800000,
image_x_exr=524607,
image_x_fuji_raf=524608 | 0x00800000,
image_x_gem=524609,
image_x_icns=524610,
image_x_icon=524611 | 0x80000000,
image_x_jg=524612,
image_x_jps=524613,
image_x_kodak_dcr=524614 | 0x00800000,
image_x_kodak_k25=524615 | 0x00800000,
image_x_kodak_kdc=524616 | 0x00800000,
image_x_minolta_mrw=524617 | 0x00800000,
image_x_ms_bmp=524618,
image_x_niff=524619,
image_x_nikon_nef=524620 | 0x00800000,
image_x_olympus_orf=524621 | 0x00800000,
image_x_panasonic_raw=524622 | 0x00800000,
image_x_pcx=524623,
image_x_pentax_pef=524624 | 0x00800000,
image_x_pict=524625,
image_x_portable_bitmap=524626,
image_x_portable_graymap=524627,
image_x_portable_pixmap=524628,
image_x_quicktime=524629,
image_x_rgb=524630,
image_x_sigma_x3f=524631 | 0x00800000,
image_x_sony_arw=524632 | 0x00800000,
image_x_sony_sr2=524633 | 0x00800000,
image_x_sony_srf=524634 | 0x00800000,
image_x_tga=524635,
image_x_tiff=524636,
image_x_win_bitmap=524637,
image_x_xcf=524638 | 0x80000000,
image_x_xpixmap=524639 | 0x80000000,
image_x_xwindowdump=524640,
message_news=196961,
message_rfc822=196962,
model_vnd_dwf=65891,
model_vnd_gdl=65892,
model_vnd_gs_gdl=65893,
model_vrml=65894,
model_x_pov=65895,
sist2_sidecar=2,
text_PGP=590183,
text_asp=590184,
text_css=590185,
text_html=590186 | 0x01000000,
text_javascript=590187,
text_mcf=590188,
text_pascal=590189,
text_plain=590190,
text_richtext=590191,
text_rtf=590192,
text_scriplet=590193,
text_tab_separated_values=590194,
text_troff=590195,
text_uri_list=590196,
text_vnd_abc=590197,
text_vnd_fmi_flexstor=590198,
text_vnd_wap_wml=590199,
text_vnd_wap_wmlscript=590200,
text_webviewhtml=590201,
text_x_Algol68=590202,
text_x_asm=590203,
text_x_audiosoft_intra=590204,
text_x_awk=590205,
text_x_bcpl=590206,
text_x_c=590207,
text_x_c__=590208,
text_x_component=590209,
text_x_diff=590210,
text_x_fortran=590211,
text_x_java=590212,
text_x_la_asf=590213,
text_x_lisp=590214,
text_x_m=590215,
text_x_m4=590216,
text_x_makefile=590217,
text_x_ms_regedit=590218,
text_x_msdos_batch=590219,
text_x_objective_c=590220,
text_x_pascal=590221,
text_x_perl=590222,
text_x_php=590223,
text_x_po=590224,
text_x_python=590225,
text_x_ruby=590226,
text_x_sass=590227,
text_x_scss=590228,
text_x_server_parsed_html=590229,
text_x_setext=590230,
text_x_sgml=590231 | 0x01000000,
text_x_shellscript=590232,
text_x_speech=590233,
text_x_tcl=590234,
text_x_tex=590235,
text_x_uil=590236,
text_x_uuencode=590237,
text_x_vcalendar=590238,
text_x_vcard=590239,
text_xml=590240 | 0x01000000,
video_MP2T=393633,
video_animaflex=393634,
video_avi=393635,
video_avs_video=393636,
video_mp4=393637,
video_mpeg=393638,
video_quicktime=393639,
video_vdo=393640,
video_vivo=393641,
video_vnd_rn_realvideo=393642,
video_vosaic=393643,
video_webm=393644,
video_x_amt_demorun=393645,
video_x_amt_showrun=393646,
video_x_atomic3d_feature=393647,
video_x_dl=393648,
video_x_dv=393649,
video_x_fli=393650,
video_x_flv=393651,
video_x_isvideo=393652,
video_x_jng=393653 | 0x80000000,
video_x_m4v=393654,
video_x_matroska=393655,
video_x_mng=393656,
video_x_motion_jpeg=393657,
video_x_ms_asf=393658,
video_x_msvideo=393659,
video_x_qtc=393660,
video_x_sgi_movie=393661,
x_epoc_x_sisx_app=721342,
text_PGP=590184,
text_asp=590185,
text_css=590186,
text_html=590187 | 0x01000000,
text_javascript=590188,
text_mcf=590189,
text_pascal=590190,
text_plain=590191,
text_richtext=590192,
text_rtf=590193,
text_scriplet=590194,
text_tab_separated_values=590195,
text_troff=590196,
text_uri_list=590197,
text_vnd_abc=590198,
text_vnd_fmi_flexstor=590199,
text_vnd_wap_wml=590200,
text_vnd_wap_wmlscript=590201,
text_webviewhtml=590202,
text_x_Algol68=590203,
text_x_asm=590204,
text_x_audiosoft_intra=590205,
text_x_awk=590206,
text_x_bcpl=590207,
text_x_c=590208,
text_x_c__=590209,
text_x_component=590210,
text_x_diff=590211,
text_x_fortran=590212,
text_x_java=590213,
text_x_la_asf=590214,
text_x_lisp=590215,
text_x_m=590216,
text_x_m4=590217,
text_x_makefile=590218,
text_x_ms_regedit=590219,
text_x_msdos_batch=590220,
text_x_objective_c=590221,
text_x_pascal=590222,
text_x_perl=590223,
text_x_php=590224,
text_x_po=590225,
text_x_python=590226,
text_x_ruby=590227,
text_x_sass=590228,
text_x_scss=590229,
text_x_server_parsed_html=590230,
text_x_setext=590231,
text_x_sgml=590232 | 0x01000000,
text_x_shellscript=590233,
text_x_speech=590234,
text_x_tcl=590235,
text_x_tex=590236,
text_x_uil=590237,
text_x_uuencode=590238,
text_x_vcalendar=590239,
text_x_vcard=590240,
text_xml=590241 | 0x01000000,
video_MP2T=393634,
video_animaflex=393635,
video_avi=393636,
video_avs_video=393637,
video_mp4=393638,
video_mpeg=393639,
video_quicktime=393640,
video_vdo=393641,
video_vivo=393642,
video_vnd_rn_realvideo=393643,
video_vosaic=393644,
video_webm=393645,
video_x_amt_demorun=393646,
video_x_amt_showrun=393647,
video_x_atomic3d_feature=393648,
video_x_dl=393649,
video_x_dv=393650,
video_x_fli=393651,
video_x_flv=393652,
video_x_isvideo=393653,
video_x_jng=393654 | 0x80000000,
video_x_m4v=393655,
video_x_matroska=393656,
video_x_mng=393657,
video_x_motion_jpeg=393658,
video_x_ms_asf=393659,
video_x_msvideo=393660,
video_x_qtc=393661,
video_x_sgi_movie=393662,
x_epoc_x_sisx_app=721343,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj";
@@ -480,6 +481,7 @@ case application_java_archive: return "application/java-archive";
case application_java: return "application/java";
case application_javascript: return "application/javascript";
case application_json: return "application/json";
case application_ndjson: return "application/ndjson";
case application_marc: return "application/marc";
case application_mbedlet: return "application/mbedlet";
case application_mime: return "application/mime";
@@ -930,6 +932,8 @@ g_hash_table_insert(ext_table, "inf", (gpointer)application_inf);
g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive);
g_hash_table_insert(ext_table, "class", (gpointer)application_java);
g_hash_table_insert(ext_table, "json", (gpointer)application_json);
g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson);
g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson);
g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc);
g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet);
g_hash_table_insert(ext_table, "aps", (gpointer)application_mime);
@@ -1474,6 +1478,7 @@ g_hash_table_insert(mime_table, "application/java-archive", (gpointer)applicatio
g_hash_table_insert(mime_table, "application/java", (gpointer)application_java);
g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript);
g_hash_table_insert(mime_table, "application/json", (gpointer)application_json);
g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson);
g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc);
g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet);
g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime);

View File

@@ -10,25 +10,34 @@
#define MIN_VIDEO_SIZE (1024 * 64)
#define MIN_IMAGE_SIZE (1024 * 2)
#define MIN_IMAGE_SIZE (512)
int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) {
SHA1_Init(&f->sha1_ctx);
f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) {
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1;
}
}
return read(f->fd, buf, size);
int ret = (int) read(f->fd, buf, size);
if (ret != 0 && f->calculate_checksum) {
f->has_checksum = TRUE;
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
}
return ret;
}
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
void fs_close(struct vfile *f) {
if (f->fd != -1) {
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
close(f->fd);
}
}
@@ -66,7 +75,7 @@ void parse(void *arg) {
doc->meta_tail = NULL;
doc->mime = 0;
doc->size = job->vfile.info.st_size;
doc->mtime = job->vfile.info.st_mtim.tv_sec;
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
@@ -93,18 +102,17 @@ void parse(void *arg) {
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
}
int bytes_read = 0;
if (doc->mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic
if (!job->vfile.is_fs_file) {
if (job->vfile.read_rewindable == NULL) {
LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported");
"File does not support rewindable reads, cannot guess Media type");
goto abort;
}
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE);
int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) {
if (job->vfile.is_fs_file) {
@@ -135,7 +143,9 @@ void parse(void *arg) {
}
}
job->vfile.reset(&job->vfile);
if (job->vfile.reset != NULL) {
job->vfile.reset(&job->vfile);
}
magic_close(magic);
}
@@ -149,7 +159,7 @@ void parse(void *arg) {
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(&ScanCtx.media_ctx, &job->vfile, doc);
parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
} else if (IS_PDF(doc->mime)) {
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
@@ -169,7 +179,7 @@ void parse(void *arg) {
IS_ARC(doc->mime) ||
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
)) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc);
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
@@ -179,11 +189,15 @@ void parse(void *arg) {
} else if (doc->mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, doc);
CLOSE_FILE(job->vfile)
free(doc->filepath);
free(doc);
return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
} else if (is_wpd(&ScanCtx.wpd_ctx, doc->mime)) {
parse_wpd(&ScanCtx.wpd_ctx, &job->vfile, doc);
} else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
} else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
}
abort:
@@ -200,9 +214,15 @@ void parse(void *arg) {
doc->has_parent = FALSE;
}
write_document(doc);
CLOSE_FILE(job->vfile)
if (job->vfile.has_checksum) {
char sha1_digest_str[SHA1_STR_LENGTH];
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
}
write_document(doc);
}
void cleanup_parse() {

View File

@@ -3,7 +3,7 @@
#include "../sist.h"
#define MAGIC_BUF_SIZE 4096 * 6
#define MAGIC_BUF_SIZE (4096 * 6)
int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f);

View File

@@ -27,7 +27,10 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1);
char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);

View File

@@ -26,6 +26,8 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
@@ -49,7 +51,7 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.11.2"
#define VERSION "2.11.3"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM

File diff suppressed because one or more lines are too long