Compare commits

...

13 Commits

36 changed files with 675 additions and 553 deletions

View File

@ -50,7 +50,8 @@ sist2 (Simple incremental search tool)
``` ```
1. Download sist2 executable 1. Download sist2 executable
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) * 1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not recommended!)* 1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
1. *(or)* `docker pull simon987/sist2:2.11.2-x64-linux` 1. *(or)* `docker pull simon987/sist2:2.11.2-x64-linux`
1. See [Usage guide](docs/USAGE.md) 1. See [Usage guide](docs/USAGE.md)
@ -70,19 +71,20 @@ See [Usage guide](docs/USAGE.md) for more details
File type | Library | Content | Thumbnail | Metadata File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:--- :---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | *(none)* | - | yes | - | cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags | `audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist | `video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | `image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | *(none)* | yes | no | - | `text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
html, xml | *(none)* | yes | no | - | html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | *(none)* | yes | if embedded | creator, modified_by, title | docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title | doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title | mobi, azw, azw3 | libmobi | yes | no | author, title |
wpd (WordPerfect) | libwpd | yes | no | *planned* | wpd (WordPerfect) | libwpd | yes | no | *planned* |
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*

View File

@ -49,6 +49,7 @@ Scan options
--mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000 --mem-buffer=<int> Maximum memory buffer size per thread in MB for files inside archives (see USAGE.md). DEFAULT: 2000
--read-subtitles Read subtitles from media files. --read-subtitles Read subtitles from media files.
--fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata) --fast-epub Faster but less accurate EPUB parsing (no thumbnails, metadata)
--checksums Calculate file checksums when scanning.
Index options Index options
-t, --threads=<int> Number of threads. DEFAULT=1 -t, --threads=<int> Number of threads. DEFAULT=1
@ -129,6 +130,9 @@ Exec-script options
To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -` To check if a media file can be parsed without *seek*, execute `cat file.mp4 | ffprobe -`
* `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files. * `--read-subtitles` When enabled, will attempt to read the subtitles stream from media files.
* `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed. * `--fast-epub` Much faster but less accurate EPUB parsing. When enabled, sist2 will use a simple HTML parser to read epub files instead of the MuPDF library. No thumbnails are generated and author/title metadata are not parsed.
* `--checksums` Calculate file checksums (sha1) when scanning files. This option does not cause any additional read
operations. Checksums are not calculated for all file types, unless the file is inside an archive. When enabled, duplicate
files are hidden in the web UI (this behaviour can be toggled in the Configuration page).
### Scan examples ### Scan examples

View File

@ -4,6 +4,10 @@
"type": "keyword", "type": "keyword",
"doc_values": true "doc_values": true
}, },
"checksum": {
"type": "keyword",
"index": false
},
"_depth": { "_depth": {
"type": "integer" "type": "integer"
}, },

View File

@ -2,7 +2,8 @@
"index": { "index": {
"refresh_interval": "30s", "refresh_interval": "30s",
"codec": "best_compression", "codec": "best_compression",
"number_of_replicas": 0 "number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
}, },
"analysis": { "analysis": {
"tokenizer": { "tokenizer": {

View File

@ -22,6 +22,7 @@ application/java-archive, jar
application/java, class application/java, class
application/javascript, application/javascript,
application/json, json application/json, json
application/ndjson, jsonl|ndjson
application/marc, mrc application/marc, mrc
application/mbedlet, mbd application/mbedlet, mbd
application/mime, aps application/mime, aps

1 application/arj arj
22 application/java class
23 application/javascript
24 application/json json
25 application/ndjson jsonl|ndjson
26 application/marc mrc
27 application/mbedlet mbd
28 application/mime aps

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -23,7 +23,6 @@
"vue-color": "^2.8.1", "vue-color": "^2.8.1",
"vue-i18n": "^8.24.4", "vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2", "vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0", "vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1", "vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0" "vuex": "^3.4.0"
@ -13604,15 +13603,6 @@
"node": ">=10" "node": ">=10"
} }
}, },
"node_modules/vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w==",
"engines": {
"node": ">= 4.0.0",
"npm": ">= 3.0.0"
}
},
"node_modules/vue-observe-visibility": { "node_modules/vue-observe-visibility": {
"version": "0.4.6", "version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz", "resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",
@ -26376,11 +26366,6 @@
"vue-observe-visibility": "^0.4.6" "vue-observe-visibility": "^0.4.6"
} }
}, },
"vue-multiselect": {
"version": "2.1.6",
"resolved": "https://registry.npmjs.org/vue-multiselect/-/vue-multiselect-2.1.6.tgz",
"integrity": "sha512-s7jmZPlm9FeueJg1RwJtnE9KNPtME/7C8uRWSfp9/yEN4M8XcS/d+bddoyVwVnvFyRh9msFo0HWeW0vTL8Qv+w=="
},
"vue-observe-visibility": { "vue-observe-visibility": {
"version": "0.4.6", "version": "0.4.6",
"resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz", "resolved": "https://registry.npmjs.org/vue-observe-visibility/-/vue-observe-visibility-0.4.6.tgz",

View File

@ -22,7 +22,6 @@
"vue-color": "^2.8.1", "vue-color": "^2.8.1",
"vue-i18n": "^8.24.4", "vue-i18n": "^8.24.4",
"vue-masonry-wall": "^0.3.2", "vue-masonry-wall": "^0.3.2",
"vue-multiselect": "^2.1.6",
"vue-router": "^3.2.0", "vue-router": "^3.2.0",
"vue-simple-suggest": "^1.11.1", "vue-simple-suggest": "^1.11.1",
"vuex": "^3.4.0" "vuex": "^3.4.0"

View File

@ -50,6 +50,7 @@ export interface EsHit {
height: number height: number
duration: number duration: number
tag: string[] tag: string[]
checksum: string
} }
_props: { _props: {
isSubDocument: boolean isSubDocument: boolean

View File

@ -187,7 +187,8 @@ class Sist2Query {
"name.nGram": {}, "name.nGram": {},
"content.nGram": {}, "content.nGram": {},
font_name: {}, font_name: {},
} },
max_analyzed_offset: 9_999_999
}; };
if (getters.optSearchInPath) { if (getters.optSearchInPath) {
q.highlight.fields["path.text"] = {}; q.highlight.fields["path.text"] = {};

View File

@ -4,7 +4,8 @@
<template #modal-title> <template #modal-title>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5> <h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
</template> </template>
<img :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">
<InfoTable :doc="doc"></InfoTable> <InfoTable :doc="doc"></InfoTable>

View File

@ -1,5 +1,5 @@
<template> <template>
<b-list-group-item class="flex-column align-items-start mb-2"> <b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
<!-- Info modal--> <!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal> <DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
@ -40,9 +40,11 @@
</div> </div>
<div v-if="doc._source.pages || doc._source.author" class="path-row text-muted"> <div v-if="doc._source.pages || doc._source.author" class="path-row text-muted">
<span v-if="doc._source.pages">{{ doc._source.pages }} {{ doc._source.pages > 1 ? $t("pages") : $t("page") }}</span> <span v-if="doc._source.pages">{{ doc._source.pages }} {{
doc._source.pages > 1 ? $t("pages") : $t("page")
}}</span>
<span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span> <span v-if="doc._source.author && doc._source.pages" class="mx-1">-</span>
<span v-if="doc._source.author">{{doc._source.author}}</span> <span v-if="doc._source.author">{{ doc._source.author }}</span>
</div> </div>
</div> </div>
</div> </div>
@ -89,6 +91,14 @@ export default {
</script> </script>
<style scoped> <style scoped>
.sub-document {
background: #AB47BC1F !important;
}
.theme-black .sub-document {
background: #37474F !important;
}
.list-group { .list-group {
margin-top: 1em; margin-top: 1em;
} }

View File

@ -1,93 +1,95 @@
<template> <template>
<VueMultiselect <div v-if="isMobile">
multiple <b-form-select
label="name" :value="selectedIndicesIds"
:value="selectedIndices" @change="onSelect($event)"
:options="indices" :options="indices" multiple :select-size="6" text-field="name"
:close-on-select="indices.length <= 1" value-field="id"></b-form-select>
:placeholder="$t('indexPickerPlaceholder')" </div>
@select="addItem" <div v-else>
@remove="removeItem"> <b-list-group id="index-picker-desktop">
<b-list-group-item
<template slot="option" slot-scope="idx"> v-for="idx in indices"
<b-row> @click="toggleIndex(idx)"
<b-col> class="d-flex justify-content-between align-items-center list-group-item-action pointer">
<span class="mr-1">{{ idx.option.name }}</span> <div class="d-flex">
<SmallBadge pill :text="idx.option.version"></SmallBadge> <b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
</b-col> {{ idx.name }}
</b-row> <span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
<b-row class="mt-1"> </div>
<b-col> <b-badge class="version-badge">v{{ idx.version }}</b-badge>
<span>{{ formatIdxDate(idx.option.timestamp) }}</span> </b-list-group-item>
</b-col> </b-list-group>
</b-row> </div>
</template>
</VueMultiselect>
</template> </template>
<script lang="ts"> <script lang="ts">
import VueMultiselect from "vue-multiselect"
import SmallBadge from "./SmallBadge.vue" import SmallBadge from "./SmallBadge.vue"
import {mapActions, mapGetters} from "vuex"; import {mapActions, mapGetters} from "vuex";
import {Index} from "@/Sist2Api";
import Vue from "vue"; import Vue from "vue";
import {format} from "date-fns"; import {format} from "date-fns";
export default Vue.extend({ export default Vue.extend({
components: { components: {
VueMultiselect,
SmallBadge SmallBadge
}, },
data() { data() {
return { return {
loading: true loading: true,
} }
}, },
computed: { computed: {
...mapGetters([ ...mapGetters([
"indices", "selectedIndices" "indices", "selectedIndices"
]), ]),
selectedIndicesIds() {
return this.selectedIndices.map(idx => idx.id)
},
isMobile() {
return window.innerWidth <= 650;
}
}, },
methods: { methods: {
...mapActions({ ...mapActions({
setSelectedIndices: "setSelectedIndices" setSelectedIndices: "setSelectedIndices"
}), }),
removeItem(val: Index): void { onSelect(value) {
this.setSelectedIndices(this.selectedIndices.filter((item: Index) => item !== val)) this.setSelectedIndices(this.indices.filter(idx => value.includes(idx.id)));
},
addItem(val: Index): void {
this.setSelectedIndices([...this.selectedIndices, val])
}, },
formatIdxDate(timestamp: number): string { formatIdxDate(timestamp: number): string {
return format(new Date(timestamp * 1000), "yyyy-MM-dd"); return format(new Date(timestamp * 1000), "yyyy-MM-dd");
},
toggleIndex(index) {
if (this.isSelected(index)) {
this.setSelectedIndices(this.selectedIndices.filter(idx => idx.id != index.id));
} else {
this.setSelectedIndices([index, ...this.selectedIndices]);
}
},
isSelected(index) {
return this.selectedIndices.find(idx => idx.id == index.id) != null;
} }
}, },
}) })
</script> </script>
<style src="vue-multiselect/dist/vue-multiselect.min.css"></style> <style scoped>
.timestamp-text {
<style> line-height: 24px;
.multiselect__option { font-size: 80%;
padding: 5px 10px;
} }
.multiselect__content-wrapper { .version-badge {
overflow: hidden; color: #222 !important;
background: none;
} }
.theme-black .multiselect__tags { .list-group-item {
background: #37474F; padding: 0.2em 0.4em;
border: 1px solid #616161 !important
} }
.theme-black .multiselect__input { #index-picker-desktop {
color: #dbdbdb; overflow-y: auto;
background: #37474F; max-height: 132px;
}
.theme-black .multiselect__content-wrapper {
border: none
} }
</style> </style>

View File

@ -3,7 +3,7 @@
<template #cell(value)="data"> <template #cell(value)="data">
<span v-if="'html' in data.item" v-html="data.item.html"></span> <span v-if="'html' in data.item" v-html="data.item.html"></span>
<span v-else>{{data.value}}</span> <span v-else>{{ data.value }}</span>
</template> </template>
</b-table> </b-table>
</template> </template>
@ -58,6 +58,7 @@ export default {
"modified_by", "pages", "tag", "modified_by", "pages", "tag",
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length", "exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime", "exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",
"checksum"
]; ];
fields.forEach(field => { fields.forEach(field => {

View File

@ -62,7 +62,8 @@ export default {
lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.", lightboxLoadOnlyCurrent: "Do not preload full-size images for adjacent slides in image viewer.",
slideDuration: "Slide duration", slideDuration: "Slide duration",
resultSize: "Number of results per page", resultSize: "Number of results per page",
tagOrOperator: "Use OR operator when specifying multiple tags." tagOrOperator: "Use OR operator when specifying multiple tags.",
hideDuplicates: "Hide duplicate results based on checksum"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",
@ -209,7 +210,8 @@ export default {
lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images", lightboxLoadOnlyCurrent: "Désactiver le chargement des diapositives adjacentes pour le visualiseur d'images",
slideDuration: "Durée des diapositives", slideDuration: "Durée des diapositives",
resultSize: "Nombre de résultats par page", resultSize: "Nombre de résultats par page",
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags" tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
hideDuplicates: "Masquer les résultats en double"
}, },
queryMode: { queryMode: {
simple: "Simple", simple: "Simple",

View File

@ -27,6 +27,7 @@ export default new Vuex.Store({
size: 60, size: 60,
optLang: "en", optLang: "en",
optHideDuplicates: true,
optTheme: "light", optTheme: "light",
optDisplay: "grid", optDisplay: "grid",
@ -79,6 +80,7 @@ export default new Vuex.Store({
setSizeMax: (state, val) => state.sizeMax = val, setSizeMax: (state, val) => state.sizeMax = val,
setSist2Info: (state, val) => state.sist2Info = val, setSist2Info: (state, val) => state.sist2Info = val,
setSeed: (state, val) => state.seed = val, setSeed: (state, val) => state.seed = val,
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
setOptLang: (state, val) => state.optLang = val, setOptLang: (state, val) => state.optLang = val,
setSortMode: (state, val) => state.sortMode = val, setSortMode: (state, val) => state.sortMode = val,
setIndices: (state, val) => { setIndices: (state, val) => {
@ -317,6 +319,7 @@ export default new Vuex.Store({
uiLightboxKey: state => state.uiLightboxKey, uiLightboxKey: state => state.uiLightboxKey,
uiLightboxSlide: state => state.uiLightboxSlide, uiLightboxSlide: state => state.uiLightboxSlide,
optHideDuplicates: state => state.optHideDuplicates,
optLang: state => state.optLang, optLang: state => state.optLang,
optTheme: state => state.optTheme, optTheme: state => state.optTheme,
optDisplay: state => state.optDisplay, optDisplay: state => state.optDisplay,

View File

@ -35,6 +35,11 @@
<br/> <br/>
<h4>{{ $t("searchOptions") }}</h4> <h4>{{ $t("searchOptions") }}</h4>
<b-card> <b-card>
<b-form-checkbox :checked="optHideDuplicates" @input="setOptHideDuplicates">{{
$t("opt.hideDuplicates")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox> <b-form-checkbox :checked="optHighlight" @input="setOptHighlight">{{ $t("opt.highlight") }}</b-form-checkbox>
<b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{ <b-form-checkbox :checked="optTagOrOperator" @input="setOptTagOrOperator">{{
$t("opt.tagOrOperator") $t("opt.tagOrOperator")
@ -206,10 +211,10 @@ export default {
"optTreemapSize", "optTreemapSize",
"optLightboxLoadOnlyCurrent", "optLightboxLoadOnlyCurrent",
"optLightboxSlideDuration", "optLightboxSlideDuration",
"optContainerWidth",
"optResultSize", "optResultSize",
"optTagOrOperator", "optTagOrOperator",
"optLang" "optLang",
"optHideDuplicates",
]), ]),
clientWidth() { clientWidth() {
return window.innerWidth; return window.innerWidth;
@ -248,7 +253,8 @@ export default {
"setOptContainerWidth", "setOptContainerWidth",
"setOptResultSize", "setOptResultSize",
"setOptTagOrOperator", "setOptTagOrOperator",
"setOptLang" "setOptLang",
"setOptHideDuplicates"
]), ]),
onResetClick() { onResetClick() {
localStorage.removeItem("sist2_configuration"); localStorage.removeItem("sist2_configuration");

View File

@ -91,6 +91,7 @@ export default Vue.extend({
search: undefined as any, search: undefined as any,
docs: [] as EsHit[], docs: [] as EsHit[],
docIds: new Set(), docIds: new Set(),
docChecksums: new Set(),
searchBusy: false, searchBusy: false,
Sist2Query: Sist2Query, Sist2Query: Sist2Query,
showHelp: false showHelp: false
@ -193,6 +194,7 @@ export default Vue.extend({
async clearResults() { async clearResults() {
this.docs = []; this.docs = [];
this.docIds.clear(); this.docIds.clear();
this.docChecksums.clear();
await this.$store.dispatch("clearResults"); await this.$store.dispatch("clearResults");
this.$store.commit("setUiReachedScrollEnd", false); this.$store.commit("setUiReachedScrollEnd", false);
}, },
@ -202,7 +204,19 @@ export default Vue.extend({
} }
resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id)); resp.hits.hits = resp.hits.hits.filter(hit => !this.docIds.has(hit._id));
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
if (this.$store.state.optHideDuplicates) {
resp.hits.hits = resp.hits.hits.filter(hit => {
if (!("checksum" in hit._source)) {
return true;
}
const isDupe = !this.docChecksums.has(hit._source.checksum);
this.docChecksums.add(hit._source.checksum);
return isDupe;
});
}
for (const hit of resp.hits.hits) { for (const hit of resp.hits.hits) {
if (hit._props.isPlayableImage || hit._props.isPlayableVideo) { if (hit._props.isPlayableImage || hit._props.isPlayableVideo) {

View File

@ -28,6 +28,7 @@ typedef struct scan_args {
int max_memory_buffer; int max_memory_buffer;
int read_subtitles; int read_subtitles;
int fast_epub; int fast_epub;
int calculate_checksums;
} scan_args_t; } scan_args_t;
scan_args_t *scan_args_create(); scan_args_t *scan_args_create();

View File

@ -15,6 +15,7 @@
#include "libscan/raw/raw.h" #include "libscan/raw/raw.h"
#include "libscan/msdoc/msdoc.h" #include "libscan/msdoc/msdoc.h"
#include "libscan/wpd/wpd.h" #include "libscan/wpd/wpd.h"
#include "libscan/json/json.h"
#include "src/io/store.h" #include "src/io/store.h"
#include <glib.h> #include <glib.h>
@ -32,6 +33,7 @@ typedef struct {
int threads; int threads;
int depth; int depth;
int calculate_checksums;
size_t stat_tn_size; size_t stat_tn_size;
size_t stat_index_size; size_t stat_index_size;
@ -62,6 +64,7 @@ typedef struct {
scan_raw_ctx_t raw_ctx; scan_raw_ctx_t raw_ctx;
scan_msdoc_ctx_t msdoc_ctx; scan_msdoc_ctx_t msdoc_ctx;
scan_wpd_ctx_t wpd_ctx; scan_wpd_ctx_t wpd_ctx;
scan_json_ctx_t json_ctx;
} ScanCtx_t; } ScanCtx_t;
typedef struct { typedef struct {

File diff suppressed because one or more lines are too long

View File

@ -74,6 +74,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "exif_gps_latitude_dms"; return "exif_gps_latitude_dms";
case MetaExifGpsLatitudeDec: case MetaExifGpsLatitudeDec:
return "exif_gps_latitude_dec"; return "exif_gps_latitude_dec";
case MetaChecksum:
return "checksum";
default: default:
LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key) LOG_FATALF("serialize.c", "FIXME: Unknown meta key: %d", meta_key)
} }
@ -165,6 +167,7 @@ char *build_json_string(document_t *doc) {
case MetaExifGpsLatitudeDMS: case MetaExifGpsLatitudeDMS:
case MetaExifGpsLatitudeDec: case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef: case MetaExifGpsLatitudeRef:
case MetaChecksum:
case MetaTitle: { case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val); cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val); buffer_size_guess += (int) strlen(meta->str_val);

View File

@ -4,6 +4,7 @@
store_t *store_create(const char *path, size_t chunk_size) { store_t *store_create(const char *path, size_t chunk_size) {
store_t *store = malloc(sizeof(struct store_t)); store_t *store = malloc(sizeof(struct store_t));
mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR); mkdir(path, S_IWUSR | S_IRUSR | S_IXUSR);
strcpy(store->path, path);
#if (SIST_FAKE_STORE != 1) #if (SIST_FAKE_STORE != 1)
store->chunk_size = chunk_size; store->chunk_size = chunk_size;
@ -78,27 +79,57 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0); int put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
ScanCtx.stat_tn_size += buf_len; ScanCtx.stat_tn_size += buf_len;
int db_full = FALSE;
int should_abort_transaction = FALSE;
if (put_ret == MDB_MAP_FULL) { if (put_ret == MDB_MAP_FULL) {
db_full = TRUE;
should_abort_transaction = TRUE;
} else {
int commit_ret = mdb_txn_commit(txn);
if (commit_ret == MDB_MAP_FULL) {
db_full = TRUE;
}
}
if (db_full) {
LOG_INFOF("store.c", "Updating mdb mapsize to %lu bytes", store->size)
if (should_abort_transaction) {
mdb_txn_abort(txn); mdb_txn_abort(txn);
}
pthread_rwlock_unlock(&store->lock); pthread_rwlock_unlock(&store->lock);
// Cannot resize when there is a opened transaction. // Cannot resize when there is a opened transaction.
// Resize take effect on the next commit. // Resize take effect on the next commit.
pthread_rwlock_wrlock(&store->lock); pthread_rwlock_wrlock(&store->lock);
store->size += store->chunk_size; store->size += store->chunk_size;
mdb_env_set_mapsize(store->env, store->size); int resize_ret = mdb_env_set_mapsize(store->env, store->size);
mdb_txn_begin(store->env, NULL, 0, &txn); if (resize_ret != 0) {
put_ret = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
}
mdb_txn_commit(txn);
pthread_rwlock_unlock(&store->lock);
if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret)) LOG_ERROR("store.c", mdb_strerror(put_ret))
} }
mdb_txn_begin(store->env, NULL, 0, &txn);
int put_ret_retry = mdb_put(txn, store->dbi, &mdb_key, &mdb_value, 0);
if (put_ret_retry != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
int ret = mdb_txn_commit(txn);
if (ret != 0) {
LOG_FATALF("store.c", "FIXME: Could not commit to store %s: %s (%d), %d, %d %d",
store->path, mdb_strerror(ret), ret,
put_ret, put_ret_retry);
}
LOG_INFOF("store.c", "Updated mdb mapsize to %lu bytes", store->size)
} else if (put_ret != 0) {
LOG_ERROR("store.c", mdb_strerror(put_ret))
}
pthread_rwlock_unlock(&store->lock);
#endif #endif
} }

View File

@ -6,12 +6,12 @@
#include <glib.h> #include <glib.h>
#define STORE_SIZE_TN 1024 * 1024 * 5 #define STORE_SIZE_TN (1024 * 1024 * 5)
#define STORE_SIZE_TAG 1024 * 16 #define STORE_SIZE_TAG (1024 * 1024)
#define STORE_SIZE_META STORE_SIZE_TAG #define STORE_SIZE_META STORE_SIZE_TAG
typedef struct store_t { typedef struct store_t {
char *path; char path[PATH_MAX];
char *tmp_path; char *tmp_path;
MDB_dbi dbi; MDB_dbi dbi;
MDB_env *env; MDB_env *env;

View File

@ -24,16 +24,22 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.filepath = job->filepath; job->vfile.filepath = job->filepath;
job->vfile.read = fs_read; job->vfile.read = fs_read;
// Filesystem reads are always rewindable
job->vfile.read_rewindable = fs_read;
job->vfile.reset = fs_reset; job->vfile.reset = fs_reset;
job->vfile.close = fs_close; job->vfile.close = fs_close;
job->vfile.fd = -1; job->vfile.fd = -1;
job->vfile.is_fs_file = TRUE; job->vfile.is_fs_file = TRUE;
job->vfile.has_checksum = FALSE;
job->vfile.rewind_buffer_size = 0;
job->vfile.rewind_buffer = NULL;
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
return job; return job;
} }
int sub_strings[30]; int sub_strings[30];
#define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, filepath, strlen(filepath), 0, 0, sub_strings, sizeof(sub_strings)) >= 0) #define EXCLUDED(str) (pcre_exec(ScanCtx.exclude, ScanCtx.exclude_extra, str, strlen(str), 0, 0, sub_strings, sizeof(sub_strings)) >= 0)
int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) { int handle_entry(const char *filepath, const struct stat *info, int typeflag, struct FTW *ftw) {

View File

@ -170,6 +170,8 @@ void initialize_scan_context(scan_args_t *args) {
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL); pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL); pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
// Archive // Archive
ScanCtx.arc_ctx.mode = args->archive_mode; ScanCtx.arc_ctx.mode = args->archive_mode;
ScanCtx.arc_ctx.log = _log; ScanCtx.arc_ctx.log = _log;
@ -259,10 +261,18 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.raw_ctx.logf = _logf; ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store; ScanCtx.raw_ctx.store = _store;
// Wpd
ScanCtx.wpd_ctx.content_size = args->content_size; ScanCtx.wpd_ctx.content_size = args->content_size;
ScanCtx.wpd_ctx.log = _log; ScanCtx.wpd_ctx.log = _log;
ScanCtx.wpd_ctx.logf = _logf; ScanCtx.wpd_ctx.logf = _logf;
ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect"); ScanCtx.wpd_ctx.wpd_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/wordperfect");
// Json
ScanCtx.json_ctx.content_size = args->content_size;
ScanCtx.json_ctx.log = _log;
ScanCtx.json_ctx.logf = _logf;
ScanCtx.json_ctx.json_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/json");
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
} }
@ -508,8 +518,8 @@ void sist2_web(web_args_t *args) {
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler); // sigsegv_handler = signal(SIGSEGV, sig_handler);
sigabrt_handler = signal(SIGABRT, sig_handler); // sigabrt_handler = signal(SIGABRT, sig_handler);
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
@ -566,6 +576,7 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."), OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub, OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"), "Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_GROUP("Index options"), OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"), OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),

View File

@ -35,425 +35,426 @@ enum mime {
application_mime=655387, application_mime=655387,
application_mspowerpoint=655388, application_mspowerpoint=655388,
application_msword=655389, application_msword=655389,
application_netmc=655390, application_ndjson=655390,
application_octet_stream=655391, application_netmc=655391,
application_oda=655392, application_octet_stream=655392,
application_ogg=655393, application_oda=655393,
application_pdf=655394 | 0x40000000, application_ogg=655394,
application_pgp_keys=655395, application_pdf=655395 | 0x40000000,
application_pgp_signature=655396, application_pgp_keys=655396,
application_pkcs7_signature=655397, application_pgp_signature=655397,
application_pkix_cert=655398, application_pkcs7_signature=655398,
application_postscript=655399, application_pkix_cert=655399,
application_pro_eng=655400, application_postscript=655400,
application_ringing_tones=655401, application_pro_eng=655401,
application_smil=655402, application_ringing_tones=655402,
application_solids=655403, application_smil=655403,
application_sounder=655404, application_solids=655404,
application_step=655405, application_sounder=655405,
application_streamingmedia=655406, application_step=655406,
application_vda=655407, application_streamingmedia=655407,
application_vnd_amazon_mobi8_ebook=655408 | 0x02000000, application_vda=655408,
application_vnd_coffeescript=655409, application_vnd_amazon_mobi8_ebook=655409 | 0x02000000,
application_vnd_fdf=655410, application_vnd_coffeescript=655410,
application_vnd_font_fontforge_sfd=655411, application_vnd_fdf=655411,
application_vnd_hp_hpgl=655412, application_vnd_font_fontforge_sfd=655412,
application_vnd_iccprofile=655413, application_vnd_hp_hpgl=655413,
application_vnd_lotus_1_2_3=655414, application_vnd_iccprofile=655414,
application_vnd_ms_cab_compressed=655415, application_vnd_lotus_1_2_3=655415,
application_vnd_ms_excel=655416, application_vnd_ms_cab_compressed=655416,
application_vnd_ms_fontobject=655417, application_vnd_ms_excel=655417,
application_vnd_ms_opentype=655418 | 0x20000000, application_vnd_ms_fontobject=655418,
application_vnd_ms_outlook=655419, application_vnd_ms_opentype=655419 | 0x20000000,
application_vnd_ms_pki_certstore=655420, application_vnd_ms_outlook=655420,
application_vnd_ms_pki_pko=655421, application_vnd_ms_pki_certstore=655421,
application_vnd_ms_pki_seccat=655422, application_vnd_ms_pki_pko=655422,
application_vnd_ms_powerpoint=655423, application_vnd_ms_pki_seccat=655423,
application_vnd_ms_project=655424, application_vnd_ms_powerpoint=655424,
application_vnd_oasis_opendocument_base=655425, application_vnd_ms_project=655425,
application_vnd_oasis_opendocument_formula=655426, application_vnd_oasis_opendocument_base=655426,
application_vnd_oasis_opendocument_graphics=655427, application_vnd_oasis_opendocument_formula=655427,
application_vnd_oasis_opendocument_presentation=655428, application_vnd_oasis_opendocument_graphics=655428,
application_vnd_oasis_opendocument_spreadsheet=655429, application_vnd_oasis_opendocument_presentation=655429,
application_vnd_oasis_opendocument_text=655430, application_vnd_oasis_opendocument_spreadsheet=655430,
application_vnd_openxmlformats_officedocument_presentationml_presentation=655431 | 0x04000000, application_vnd_oasis_opendocument_text=655431,
application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655432 | 0x04000000, application_vnd_openxmlformats_officedocument_presentationml_presentation=655432 | 0x04000000,
application_vnd_openxmlformats_officedocument_wordprocessingml_document=655433 | 0x04000000, application_vnd_openxmlformats_officedocument_spreadsheetml_sheet=655433 | 0x04000000,
application_vnd_symbian_install=655434, application_vnd_openxmlformats_officedocument_wordprocessingml_document=655434 | 0x04000000,
application_vnd_tcpdump_pcap=655435, application_vnd_symbian_install=655435,
application_vnd_wap_wmlc=655436, application_vnd_tcpdump_pcap=655436,
application_vnd_wap_wmlscriptc=655437, application_vnd_wap_wmlc=655437,
application_vnd_xara=655438, application_vnd_wap_wmlscriptc=655438,
application_vocaltec_media_desc=655439, application_vnd_xara=655439,
application_vocaltec_media_file=655440, application_vocaltec_media_desc=655440,
application_warc=655441, application_vocaltec_media_file=655441,
application_winhelp=655442, application_warc=655442,
application_wordperfect=655443, application_winhelp=655443,
application_x_123=655444, application_wordperfect=655444,
application_x_7z_compressed=655445 | 0x10000000, application_x_123=655445,
application_x_aim=655446, application_x_7z_compressed=655446 | 0x10000000,
application_x_apple_diskimage=655447, application_x_aim=655447,
application_x_arc=655448 | 0x10000000, application_x_apple_diskimage=655448,
application_x_archive=655449, application_x_arc=655449 | 0x10000000,
application_x_atari_7800_rom=655450, application_x_archive=655450,
application_x_authorware_bin=655451, application_x_atari_7800_rom=655451,
application_x_authorware_map=655452, application_x_authorware_bin=655452,
application_x_authorware_seg=655453, application_x_authorware_map=655453,
application_x_avira_qua=655454, application_x_authorware_seg=655454,
application_x_bcpio=655455, application_x_avira_qua=655455,
application_x_bittorrent=655456, application_x_bcpio=655456,
application_x_bsh=655457, application_x_bittorrent=655457,
application_x_bytecode_python=655458, application_x_bsh=655458,
application_x_bzip=655459, application_x_bytecode_python=655459,
application_x_bzip2=655460 | 0x08000000, application_x_bzip=655460,
application_x_cbr=655461, application_x_bzip2=655461 | 0x08000000,
application_x_cbz=655462, application_x_cbr=655462,
application_x_cdlink=655463, application_x_cbz=655463,
application_x_chat=655464, application_x_cdlink=655464,
application_x_chrome_extension=655465, application_x_chat=655465,
application_x_cocoa=655466, application_x_chrome_extension=655466,
application_x_conference=655467, application_x_cocoa=655467,
application_x_coredump=655468, application_x_conference=655468,
application_x_cpio=655469, application_x_coredump=655469,
application_x_dbf=655470, application_x_cpio=655470,
application_x_dbt=655471, application_x_dbf=655471,
application_x_debian_package=655472, application_x_dbt=655472,
application_x_deepv=655473, application_x_debian_package=655473,
application_x_director=655474, application_x_deepv=655474,
application_x_dmp=655475, application_x_director=655475,
application_x_dosdriver=655476, application_x_dmp=655476,
application_x_dosexec=655477, application_x_dosdriver=655477,
application_x_dvi=655478, application_x_dosexec=655478,
application_x_elc=655479, application_x_dvi=655479,
application_x_elc=655480,
application_x_empty=1, application_x_empty=1,
application_x_envoy=655480, application_x_envoy=655481,
application_x_esrehber=655481, application_x_esrehber=655482,
application_x_excel=655482, application_x_excel=655483,
application_x_executable=655483, application_x_executable=655484,
application_x_font_gdos=655484, application_x_font_gdos=655485,
application_x_font_pf2=655485, application_x_font_pf2=655486,
application_x_font_pfm=655486, application_x_font_pfm=655487,
application_x_font_sfn=655487, application_x_font_sfn=655488,
application_x_font_ttf=655488 | 0x20000000, application_x_font_ttf=655489 | 0x20000000,
application_x_fptapplication_x_dbt=655489, application_x_fptapplication_x_dbt=655490,
application_x_freelance=655490, application_x_freelance=655491,
application_x_gamecube_rom=655491, application_x_gamecube_rom=655492,
application_x_gdbm=655492, application_x_gdbm=655493,
application_x_gettext_translation=655493, application_x_gettext_translation=655494,
application_x_git=655494, application_x_git=655495,
application_x_gsp=655495, application_x_gsp=655496,
application_x_gss=655496, application_x_gss=655497,
application_x_gtar=655497, application_x_gtar=655498,
application_x_gzip=655498, application_x_gzip=655499,
application_x_hdf=655499, application_x_hdf=655500,
application_x_helpfile=655500, application_x_helpfile=655501,
application_x_httpd_imap=655501, application_x_httpd_imap=655502,
application_x_ima=655502, application_x_ima=655503,
application_x_innosetup=655503, application_x_innosetup=655504,
application_x_internett_signup=655504, application_x_internett_signup=655505,
application_x_inventor=655505, application_x_inventor=655506,
application_x_ip2=655506, application_x_ip2=655507,
application_x_java_applet=655507, application_x_java_applet=655508,
application_x_java_commerce=655508, application_x_java_commerce=655509,
application_x_java_image=655509, application_x_java_image=655510,
application_x_java_jmod=655510, application_x_java_jmod=655511,
application_x_java_keystore=655511, application_x_java_keystore=655512,
application_x_kdelnk=655512, application_x_kdelnk=655513,
application_x_koan=655513, application_x_koan=655514,
application_x_latex=655514, application_x_latex=655515,
application_x_livescreen=655515, application_x_livescreen=655516,
application_x_lotus=655516, application_x_lotus=655517,
application_x_lz4=655517 | 0x08000000, application_x_lz4=655518 | 0x08000000,
application_x_lz4_json=655518, application_x_lz4_json=655519,
application_x_lzh=655519, application_x_lzh=655520,
application_x_lzh_compressed=655520, application_x_lzh_compressed=655521,
application_x_lzip=655521 | 0x08000000, application_x_lzip=655522 | 0x08000000,
application_x_lzma=655522 | 0x08000000, application_x_lzma=655523 | 0x08000000,
application_x_lzop=655523 | 0x08000000, application_x_lzop=655524 | 0x08000000,
application_x_lzx=655524, application_x_lzx=655525,
application_x_mach_binary=655525, application_x_mach_binary=655526,
application_x_mach_executable=655526, application_x_mach_executable=655527,
application_x_magic_cap_package_1_0=655527, application_x_magic_cap_package_1_0=655528,
application_x_mathcad=655528, application_x_mathcad=655529,
application_x_maxis_dbpf=655529, application_x_maxis_dbpf=655530,
application_x_meme=655530, application_x_meme=655531,
application_x_midi=655531, application_x_midi=655532,
application_x_mif=655532, application_x_mif=655533,
application_x_mix_transfer=655533, application_x_mix_transfer=655534,
application_x_mobipocket_ebook=655534 | 0x02000000, application_x_mobipocket_ebook=655535 | 0x02000000,
application_x_ms_compress_szdd=655535, application_x_ms_compress_szdd=655536,
application_x_ms_pdb=655536, application_x_ms_pdb=655537,
application_x_ms_reader=655537, application_x_ms_reader=655538,
application_x_msaccess=655538, application_x_msaccess=655539,
application_x_n64_rom=655539, application_x_n64_rom=655540,
application_x_navi_animation=655540, application_x_navi_animation=655541,
application_x_navidoc=655541, application_x_navidoc=655542,
application_x_navimap=655542, application_x_navimap=655543,
application_x_navistyle=655543, application_x_navistyle=655544,
application_x_nes_rom=655544, application_x_nes_rom=655545,
application_x_netcdf=655545, application_x_netcdf=655546,
application_x_newton_compatible_pkg=655546, application_x_newton_compatible_pkg=655547,
application_x_nintendo_ds_rom=655547, application_x_nintendo_ds_rom=655548,
application_x_object=655548, application_x_object=655549,
application_x_omc=655549, application_x_omc=655550,
application_x_omcdatamaker=655550, application_x_omcdatamaker=655551,
application_x_omcregerator=655551, application_x_omcregerator=655552,
application_x_pagemaker=655552, application_x_pagemaker=655553,
application_x_pcl=655553, application_x_pcl=655554,
application_x_pgp_keyring=655554, application_x_pgp_keyring=655555,
application_x_pixclscript=655555, application_x_pixclscript=655556,
application_x_pkcs7_certreqresp=655556, application_x_pkcs7_certreqresp=655557,
application_x_pkcs7_signature=655557, application_x_pkcs7_signature=655558,
application_x_project=655558, application_x_project=655559,
application_x_qpro=655559, application_x_qpro=655560,
application_x_rar=655560 | 0x10000000, application_x_rar=655561 | 0x10000000,
application_x_rpm=655561, application_x_rpm=655562,
application_x_sdp=655562, application_x_sdp=655563,
application_x_sea=655563, application_x_sea=655564,
application_x_seelogo=655564, application_x_seelogo=655565,
application_x_setupscript=655565, application_x_setupscript=655566,
application_x_shar=655566, application_x_shar=655567,
application_x_sharedlib=655567, application_x_sharedlib=655568,
application_x_shockwave_flash=655568, application_x_shockwave_flash=655569,
application_x_snappy_framed=655569, application_x_snappy_framed=655570,
application_x_sprite=655570, application_x_sprite=655571,
application_x_sqlite3=655571, application_x_sqlite3=655572,
application_x_stargallery_thm=655572, application_x_stargallery_thm=655573,
application_x_stuffit=655573, application_x_stuffit=655574,
application_x_sv4cpio=655574, application_x_sv4cpio=655575,
application_x_sv4crc=655575, application_x_sv4crc=655576,
application_x_tar=655576 | 0x10000000, application_x_tar=655577 | 0x10000000,
application_x_tbook=655577, application_x_tbook=655578,
application_x_terminfo=655578, application_x_terminfo=655579,
application_x_terminfo2=655579, application_x_terminfo2=655580,
application_x_tex_tfm=655580, application_x_tex_tfm=655581,
application_x_texinfo=655581, application_x_texinfo=655582,
application_x_ustar=655582, application_x_ustar=655583,
application_x_visio=655583, application_x_visio=655584,
application_x_vnd_audioexplosion_mzz=655584, application_x_vnd_audioexplosion_mzz=655585,
application_x_vnd_ls_xpix=655585, application_x_vnd_ls_xpix=655586,
application_x_vrml=655586, application_x_vrml=655587,
application_x_wais_source=655587, application_x_wais_source=655588,
application_x_wine_extension_ini=655588, application_x_wine_extension_ini=655589,
application_x_wintalk=655589, application_x_wintalk=655590,
application_x_world=655590, application_x_world=655591,
application_x_wri=655591, application_x_wri=655592,
application_x_x509_ca_cert=655592, application_x_x509_ca_cert=655593,
application_x_xz=655593 | 0x08000000, application_x_xz=655594 | 0x08000000,
application_x_zip=655594, application_x_zip=655595,
application_x_zstd=655595 | 0x08000000, application_x_zstd=655596 | 0x08000000,
application_x_zstd_dictionary=655596, application_x_zstd_dictionary=655597,
application_xml=655597, application_xml=655598,
application_zip=655598 | 0x10000000, application_zip=655599 | 0x10000000,
application_zlib=655599, application_zlib=655600,
audio_basic=458992 | 0x80000000, audio_basic=458993 | 0x80000000,
audio_it=458993, audio_it=458994,
audio_make=458994, audio_make=458995,
audio_mid=458995, audio_mid=458996,
audio_midi=458996, audio_midi=458997,
audio_mp4=458997, audio_mp4=458998,
audio_mpeg=458998, audio_mpeg=458999,
audio_ogg=458999, audio_ogg=459000,
audio_s3m=459000, audio_s3m=459001,
audio_tsp_audio=459001, audio_tsp_audio=459002,
audio_tsplayer=459002, audio_tsplayer=459003,
audio_vnd_qcelp=459003, audio_vnd_qcelp=459004,
audio_voxware=459004, audio_voxware=459005,
audio_x_aiff=459005, audio_x_aiff=459006,
audio_x_flac=459006, audio_x_flac=459007,
audio_x_gsm=459007, audio_x_gsm=459008,
audio_x_hx_aac_adts=459008, audio_x_hx_aac_adts=459009,
audio_x_jam=459009, audio_x_jam=459010,
audio_x_liveaudio=459010, audio_x_liveaudio=459011,
audio_x_m4a=459011, audio_x_m4a=459012,
audio_x_midi=459012, audio_x_midi=459013,
audio_x_mod=459013, audio_x_mod=459014,
audio_x_mp4a_latm=459014, audio_x_mp4a_latm=459015,
audio_x_mpeg_3=459015, audio_x_mpeg_3=459016,
audio_x_mpequrl=459016, audio_x_mpequrl=459017,
audio_x_nspaudio=459017, audio_x_nspaudio=459018,
audio_x_pn_realaudio=459018, audio_x_pn_realaudio=459019,
audio_x_psid=459019, audio_x_psid=459020,
audio_x_realaudio=459020, audio_x_realaudio=459021,
audio_x_s3m=459021, audio_x_s3m=459022,
audio_x_twinvq=459022, audio_x_twinvq=459023,
audio_x_twinvq_plugin=459023, audio_x_twinvq_plugin=459024,
audio_x_voc=459024, audio_x_voc=459025,
audio_x_wav=459025, audio_x_wav=459026,
audio_x_xbox_executable=459026 | 0x80000000, audio_x_xbox_executable=459027 | 0x80000000,
audio_x_xbox360_executable=459027 | 0x80000000, audio_x_xbox360_executable=459028 | 0x80000000,
audio_xm=459028, audio_xm=459029,
font_otf=327957 | 0x20000000, font_otf=327958 | 0x20000000,
font_sfnt=327958 | 0x20000000, font_sfnt=327959 | 0x20000000,
font_woff=327959 | 0x20000000, font_woff=327960 | 0x20000000,
font_woff2=327960 | 0x20000000, font_woff2=327961 | 0x20000000,
image_bmp=524569, image_bmp=524570,
image_cmu_raster=524570, image_cmu_raster=524571,
image_fif=524571, image_fif=524572,
image_florian=524572, image_florian=524573,
image_g3fax=524573, image_g3fax=524574,
image_gif=524574, image_gif=524575,
image_heic=524575, image_heic=524576,
image_ief=524576, image_ief=524577,
image_jpeg=524577, image_jpeg=524578,
image_jutvision=524578, image_jutvision=524579,
image_naplps=524579, image_naplps=524580,
image_pict=524580, image_pict=524581,
image_png=524581, image_png=524582,
image_svg=524582 | 0x80000000, image_svg=524583 | 0x80000000,
image_svg_xml=524583 | 0x80000000, image_svg_xml=524584 | 0x80000000,
image_tiff=524584, image_tiff=524585,
image_vnd_adobe_photoshop=524585 | 0x80000000, image_vnd_adobe_photoshop=524586 | 0x80000000,
image_vnd_djvu=524586 | 0x80000000, image_vnd_djvu=524587 | 0x80000000,
image_vnd_fpx=524587, image_vnd_fpx=524588,
image_vnd_microsoft_icon=524588, image_vnd_microsoft_icon=524589,
image_vnd_rn_realflash=524589, image_vnd_rn_realflash=524590,
image_vnd_rn_realpix=524590, image_vnd_rn_realpix=524591,
image_vnd_wap_wbmp=524591, image_vnd_wap_wbmp=524592,
image_vnd_xiff=524592, image_vnd_xiff=524593,
image_webp=524593, image_webp=524594,
image_wmf=524594, image_wmf=524595,
image_x_3ds=524595, image_x_3ds=524596,
image_x_adobe_dng=524596 | 0x00800000, image_x_adobe_dng=524597 | 0x00800000,
image_x_award_bioslogo=524597, image_x_award_bioslogo=524598,
image_x_canon_cr2=524598 | 0x00800000, image_x_canon_cr2=524599 | 0x00800000,
image_x_canon_crw=524599 | 0x00800000, image_x_canon_crw=524600 | 0x00800000,
image_x_cmu_raster=524600, image_x_cmu_raster=524601,
image_x_cur=524601, image_x_cur=524602,
image_x_dcraw=524602 | 0x00800000, image_x_dcraw=524603 | 0x00800000,
image_x_dwg=524603, image_x_dwg=524604,
image_x_eps=524604, image_x_eps=524605,
image_x_epson_erf=524605 | 0x00800000, image_x_epson_erf=524606 | 0x00800000,
image_x_exr=524606, image_x_exr=524607,
image_x_fuji_raf=524607 | 0x00800000, image_x_fuji_raf=524608 | 0x00800000,
image_x_gem=524608, image_x_gem=524609,
image_x_icns=524609, image_x_icns=524610,
image_x_icon=524610 | 0x80000000, image_x_icon=524611 | 0x80000000,
image_x_jg=524611, image_x_jg=524612,
image_x_jps=524612, image_x_jps=524613,
image_x_kodak_dcr=524613 | 0x00800000, image_x_kodak_dcr=524614 | 0x00800000,
image_x_kodak_k25=524614 | 0x00800000, image_x_kodak_k25=524615 | 0x00800000,
image_x_kodak_kdc=524615 | 0x00800000, image_x_kodak_kdc=524616 | 0x00800000,
image_x_minolta_mrw=524616 | 0x00800000, image_x_minolta_mrw=524617 | 0x00800000,
image_x_ms_bmp=524617, image_x_ms_bmp=524618,
image_x_niff=524618, image_x_niff=524619,
image_x_nikon_nef=524619 | 0x00800000, image_x_nikon_nef=524620 | 0x00800000,
image_x_olympus_orf=524620 | 0x00800000, image_x_olympus_orf=524621 | 0x00800000,
image_x_panasonic_raw=524621 | 0x00800000, image_x_panasonic_raw=524622 | 0x00800000,
image_x_pcx=524622, image_x_pcx=524623,
image_x_pentax_pef=524623 | 0x00800000, image_x_pentax_pef=524624 | 0x00800000,
image_x_pict=524624, image_x_pict=524625,
image_x_portable_bitmap=524625, image_x_portable_bitmap=524626,
image_x_portable_graymap=524626, image_x_portable_graymap=524627,
image_x_portable_pixmap=524627, image_x_portable_pixmap=524628,
image_x_quicktime=524628, image_x_quicktime=524629,
image_x_rgb=524629, image_x_rgb=524630,
image_x_sigma_x3f=524630 | 0x00800000, image_x_sigma_x3f=524631 | 0x00800000,
image_x_sony_arw=524631 | 0x00800000, image_x_sony_arw=524632 | 0x00800000,
image_x_sony_sr2=524632 | 0x00800000, image_x_sony_sr2=524633 | 0x00800000,
image_x_sony_srf=524633 | 0x00800000, image_x_sony_srf=524634 | 0x00800000,
image_x_tga=524634, image_x_tga=524635,
image_x_tiff=524635, image_x_tiff=524636,
image_x_win_bitmap=524636, image_x_win_bitmap=524637,
image_x_xcf=524637 | 0x80000000, image_x_xcf=524638 | 0x80000000,
image_x_xpixmap=524638 | 0x80000000, image_x_xpixmap=524639 | 0x80000000,
image_x_xwindowdump=524639, image_x_xwindowdump=524640,
message_news=196960, message_news=196961,
message_rfc822=196961, message_rfc822=196962,
model_vnd_dwf=65890, model_vnd_dwf=65891,
model_vnd_gdl=65891, model_vnd_gdl=65892,
model_vnd_gs_gdl=65892, model_vnd_gs_gdl=65893,
model_vrml=65893, model_vrml=65894,
model_x_pov=65894, model_x_pov=65895,
sist2_sidecar=2, sist2_sidecar=2,
text_PGP=590183, text_PGP=590184,
text_asp=590184, text_asp=590185,
text_css=590185, text_css=590186,
text_html=590186 | 0x01000000, text_html=590187 | 0x01000000,
text_javascript=590187, text_javascript=590188,
text_mcf=590188, text_mcf=590189,
text_pascal=590189, text_pascal=590190,
text_plain=590190, text_plain=590191,
text_richtext=590191, text_richtext=590192,
text_rtf=590192, text_rtf=590193,
text_scriplet=590193, text_scriplet=590194,
text_tab_separated_values=590194, text_tab_separated_values=590195,
text_troff=590195, text_troff=590196,
text_uri_list=590196, text_uri_list=590197,
text_vnd_abc=590197, text_vnd_abc=590198,
text_vnd_fmi_flexstor=590198, text_vnd_fmi_flexstor=590199,
text_vnd_wap_wml=590199, text_vnd_wap_wml=590200,
text_vnd_wap_wmlscript=590200, text_vnd_wap_wmlscript=590201,
text_webviewhtml=590201, text_webviewhtml=590202,
text_x_Algol68=590202, text_x_Algol68=590203,
text_x_asm=590203, text_x_asm=590204,
text_x_audiosoft_intra=590204, text_x_audiosoft_intra=590205,
text_x_awk=590205, text_x_awk=590206,
text_x_bcpl=590206, text_x_bcpl=590207,
text_x_c=590207, text_x_c=590208,
text_x_c__=590208, text_x_c__=590209,
text_x_component=590209, text_x_component=590210,
text_x_diff=590210, text_x_diff=590211,
text_x_fortran=590211, text_x_fortran=590212,
text_x_java=590212, text_x_java=590213,
text_x_la_asf=590213, text_x_la_asf=590214,
text_x_lisp=590214, text_x_lisp=590215,
text_x_m=590215, text_x_m=590216,
text_x_m4=590216, text_x_m4=590217,
text_x_makefile=590217, text_x_makefile=590218,
text_x_ms_regedit=590218, text_x_ms_regedit=590219,
text_x_msdos_batch=590219, text_x_msdos_batch=590220,
text_x_objective_c=590220, text_x_objective_c=590221,
text_x_pascal=590221, text_x_pascal=590222,
text_x_perl=590222, text_x_perl=590223,
text_x_php=590223, text_x_php=590224,
text_x_po=590224, text_x_po=590225,
text_x_python=590225, text_x_python=590226,
text_x_ruby=590226, text_x_ruby=590227,
text_x_sass=590227, text_x_sass=590228,
text_x_scss=590228, text_x_scss=590229,
text_x_server_parsed_html=590229, text_x_server_parsed_html=590230,
text_x_setext=590230, text_x_setext=590231,
text_x_sgml=590231 | 0x01000000, text_x_sgml=590232 | 0x01000000,
text_x_shellscript=590232, text_x_shellscript=590233,
text_x_speech=590233, text_x_speech=590234,
text_x_tcl=590234, text_x_tcl=590235,
text_x_tex=590235, text_x_tex=590236,
text_x_uil=590236, text_x_uil=590237,
text_x_uuencode=590237, text_x_uuencode=590238,
text_x_vcalendar=590238, text_x_vcalendar=590239,
text_x_vcard=590239, text_x_vcard=590240,
text_xml=590240 | 0x01000000, text_xml=590241 | 0x01000000,
video_MP2T=393633, video_MP2T=393634,
video_animaflex=393634, video_animaflex=393635,
video_avi=393635, video_avi=393636,
video_avs_video=393636, video_avs_video=393637,
video_mp4=393637, video_mp4=393638,
video_mpeg=393638, video_mpeg=393639,
video_quicktime=393639, video_quicktime=393640,
video_vdo=393640, video_vdo=393641,
video_vivo=393641, video_vivo=393642,
video_vnd_rn_realvideo=393642, video_vnd_rn_realvideo=393643,
video_vosaic=393643, video_vosaic=393644,
video_webm=393644, video_webm=393645,
video_x_amt_demorun=393645, video_x_amt_demorun=393646,
video_x_amt_showrun=393646, video_x_amt_showrun=393647,
video_x_atomic3d_feature=393647, video_x_atomic3d_feature=393648,
video_x_dl=393648, video_x_dl=393649,
video_x_dv=393649, video_x_dv=393650,
video_x_fli=393650, video_x_fli=393651,
video_x_flv=393651, video_x_flv=393652,
video_x_isvideo=393652, video_x_isvideo=393653,
video_x_jng=393653 | 0x80000000, video_x_jng=393654 | 0x80000000,
video_x_m4v=393654, video_x_m4v=393655,
video_x_matroska=393655, video_x_matroska=393656,
video_x_mng=393656, video_x_mng=393657,
video_x_motion_jpeg=393657, video_x_motion_jpeg=393658,
video_x_ms_asf=393658, video_x_ms_asf=393659,
video_x_msvideo=393659, video_x_msvideo=393660,
video_x_qtc=393660, video_x_qtc=393661,
video_x_sgi_movie=393661, video_x_sgi_movie=393662,
x_epoc_x_sisx_app=721342, x_epoc_x_sisx_app=721343,
}; };
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) { char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_arj: return "application/arj"; case application_arj: return "application/arj";
@ -480,6 +481,7 @@ case application_java_archive: return "application/java-archive";
case application_java: return "application/java"; case application_java: return "application/java";
case application_javascript: return "application/javascript"; case application_javascript: return "application/javascript";
case application_json: return "application/json"; case application_json: return "application/json";
case application_ndjson: return "application/ndjson";
case application_marc: return "application/marc"; case application_marc: return "application/marc";
case application_mbedlet: return "application/mbedlet"; case application_mbedlet: return "application/mbedlet";
case application_mime: return "application/mime"; case application_mime: return "application/mime";
@ -930,6 +932,8 @@ g_hash_table_insert(ext_table, "inf", (gpointer)application_inf);
g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive); g_hash_table_insert(ext_table, "jar", (gpointer)application_java_archive);
g_hash_table_insert(ext_table, "class", (gpointer)application_java); g_hash_table_insert(ext_table, "class", (gpointer)application_java);
g_hash_table_insert(ext_table, "json", (gpointer)application_json); g_hash_table_insert(ext_table, "json", (gpointer)application_json);
g_hash_table_insert(ext_table, "jsonl", (gpointer)application_ndjson);
g_hash_table_insert(ext_table, "ndjson", (gpointer)application_ndjson);
g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc); g_hash_table_insert(ext_table, "mrc", (gpointer)application_marc);
g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet); g_hash_table_insert(ext_table, "mbd", (gpointer)application_mbedlet);
g_hash_table_insert(ext_table, "aps", (gpointer)application_mime); g_hash_table_insert(ext_table, "aps", (gpointer)application_mime);
@ -1474,6 +1478,7 @@ g_hash_table_insert(mime_table, "application/java-archive", (gpointer)applicatio
g_hash_table_insert(mime_table, "application/java", (gpointer)application_java); g_hash_table_insert(mime_table, "application/java", (gpointer)application_java);
g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript); g_hash_table_insert(mime_table, "application/javascript", (gpointer)application_javascript);
g_hash_table_insert(mime_table, "application/json", (gpointer)application_json); g_hash_table_insert(mime_table, "application/json", (gpointer)application_json);
g_hash_table_insert(mime_table, "application/ndjson", (gpointer)application_ndjson);
g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc); g_hash_table_insert(mime_table, "application/marc", (gpointer)application_marc);
g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet); g_hash_table_insert(mime_table, "application/mbedlet", (gpointer)application_mbedlet);
g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime); g_hash_table_insert(mime_table, "application/mime", (gpointer)application_mime);

View File

@ -10,25 +10,34 @@
#define MIN_VIDEO_SIZE (1024 * 64) #define MIN_VIDEO_SIZE (1024 * 64)
#define MIN_IMAGE_SIZE (1024 * 2) #define MIN_IMAGE_SIZE (512)
int fs_read(struct vfile *f, void *buf, size_t size) { int fs_read(struct vfile *f, void *buf, size_t size) {
if (f->fd == -1) { if (f->fd == -1) {
SHA1_Init(&f->sha1_ctx);
f->fd = open(f->filepath, O_RDONLY); f->fd = open(f->filepath, O_RDONLY);
if (f->fd == -1) { if (f->fd == -1) {
LOG_ERRORF(f->filepath, "open(): [%d] %s", errno, strerror(errno))
return -1; return -1;
} }
} }
return read(f->fd, buf, size); int ret = (int) read(f->fd, buf, size);
if (ret != 0 && f->calculate_checksum) {
f->has_checksum = TRUE;
safe_sha1_update(&f->sha1_ctx, (unsigned char *) buf, ret);
}
return ret;
} }
#define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));}; #define CLOSE_FILE(f) if ((f).close != NULL) {(f).close(&(f));};
void fs_close(struct vfile *f) { void fs_close(struct vfile *f) {
if (f->fd != -1) { if (f->fd != -1) {
SHA1_Final(f->sha1_digest, &f->sha1_ctx);
close(f->fd); close(f->fd);
} }
} }
@ -66,7 +75,7 @@ void parse(void *arg) {
doc->meta_tail = NULL; doc->meta_tail = NULL;
doc->mime = 0; doc->mime = 0;
doc->size = job->vfile.info.st_size; doc->size = job->vfile.info.st_size;
doc->mtime = job->vfile.info.st_mtim.tv_sec; doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5); int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) { if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
@ -93,18 +102,17 @@ void parse(void *arg) {
doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext); doc->mime = mime_get_mime_by_ext(ScanCtx.ext_table, job->filepath + job->ext);
} }
int bytes_read = 0;
if (doc->mime == 0 && !ScanCtx.fast) { if (doc->mime == 0 && !ScanCtx.fast) {
// Get mime type with libmagic // Get mime type with libmagic
if (!job->vfile.is_fs_file) { if (job->vfile.read_rewindable == NULL) {
LOG_WARNING(job->filepath, LOG_WARNING(job->filepath,
"Guessing mime type with libmagic inside archive files is not currently supported"); "File does not support rewindable reads, cannot guess Media type");
goto abort; goto abort;
} }
bytes_read = job->vfile.read(&job->vfile, buf, MAGIC_BUF_SIZE); int bytes_read = job->vfile.read_rewindable(&job->vfile, buf, MAGIC_BUF_SIZE);
if (bytes_read < 0) { if (bytes_read < 0) {
if (job->vfile.is_fs_file) { if (job->vfile.is_fs_file) {
@ -135,7 +143,9 @@ void parse(void *arg) {
} }
} }
if (job->vfile.reset != NULL) {
job->vfile.reset(&job->vfile); job->vfile.reset(&job->vfile);
}
magic_close(magic); magic_close(magic);
} }
@ -149,7 +159,7 @@ void parse(void *arg) {
} else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) || } else if ((mmime == MimeVideo && doc->size >= MIN_VIDEO_SIZE) ||
(mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) { (mmime == MimeImage && doc->size >= MIN_IMAGE_SIZE) || mmime == MimeAudio) {
parse_media(&ScanCtx.media_ctx, &job->vfile, doc); parse_media(&ScanCtx.media_ctx, &job->vfile, doc, mime_get_mime_text(doc->mime));
} else if (IS_PDF(doc->mime)) { } else if (IS_PDF(doc->mime)) {
parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc); parse_ebook(&ScanCtx.ebook_ctx, &job->vfile, mime_get_mime_text(doc->mime), doc);
@ -169,7 +179,7 @@ void parse(void *arg) {
IS_ARC(doc->mime) || IS_ARC(doc->mime) ||
(IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext)) (IS_ARC_FILTER(doc->mime) && should_parse_filtered_file(doc->filepath, doc->ext))
)) { )) {
parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc); parse_archive(&ScanCtx.arc_ctx, &job->vfile, doc, ScanCtx.exclude, ScanCtx.exclude_extra);
} else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) { } else if ((ScanCtx.ooxml_ctx.content_size > 0 || ScanCtx.media_ctx.tn_size > 0) && IS_DOC(doc->mime)) {
parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc); parse_ooxml(&ScanCtx.ooxml_ctx, &job->vfile, doc);
} else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) { } else if (is_cbr(&ScanCtx.comic_ctx, doc->mime) || is_cbz(&ScanCtx.comic_ctx, doc->mime)) {
@ -179,11 +189,15 @@ void parse(void *arg) {
} else if (doc->mime == MIME_SIST2_SIDECAR) { } else if (doc->mime == MIME_SIST2_SIDECAR) {
parse_sidecar(&job->vfile, doc); parse_sidecar(&job->vfile, doc);
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
free(doc->filepath);
free(doc);
return; return;
} else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) { } else if (is_msdoc(&ScanCtx.msdoc_ctx, doc->mime)) {
parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc); parse_msdoc(&ScanCtx.msdoc_ctx, &job->vfile, doc);
} else if (is_wpd(&ScanCtx.wpd_ctx, doc->mime)) { } else if (is_json(&ScanCtx.json_ctx, doc->mime)) {
parse_wpd(&ScanCtx.wpd_ctx, &job->vfile, doc); parse_json(&ScanCtx.json_ctx, &job->vfile, doc);
} else if (is_ndjson(&ScanCtx.json_ctx, doc->mime)) {
parse_ndjson(&ScanCtx.json_ctx, &job->vfile, doc);
} }
abort: abort:
@ -200,9 +214,15 @@ void parse(void *arg) {
doc->has_parent = FALSE; doc->has_parent = FALSE;
} }
write_document(doc);
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
if (job->vfile.has_checksum) {
char sha1_digest_str[SHA1_STR_LENGTH];
buf2hex((unsigned char *) job->vfile.sha1_digest, SHA1_DIGEST_LENGTH, (char *) sha1_digest_str);
APPEND_STR_META(doc, MetaChecksum, (const char *) sha1_digest_str);
}
write_document(doc);
} }
void cleanup_parse() { void cleanup_parse() {

View File

@ -3,7 +3,7 @@
#include "../sist.h" #include "../sist.h"
#define MAGIC_BUF_SIZE 4096 * 6 #define MAGIC_BUF_SIZE (4096 * 6)
int fs_read(struct vfile *f, void *buf, size_t size); int fs_read(struct vfile *f, void *buf, size_t size);
void fs_close(struct vfile *f); void fs_close(struct vfile *f);

View File

@ -27,7 +27,10 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len, MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5); path_md5);
store_write(ScanCtx.index.meta_store, (char *) path_md5, sizeof(path_md5), json_str, strlen(json_str) + 1); char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
cJSON_Delete(json); cJSON_Delete(json);
free(json_str); free(json_str);

View File

@ -26,6 +26,8 @@
#define UNUSED(x) __attribute__((__unused__)) x #define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33 #define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h" #include "util.h"
#include "log.h" #include "log.h"
@ -49,7 +51,7 @@
#include <ctype.h> #include <ctype.h>
#include "git_hash.h" #include "git_hash.h"
#define VERSION "2.11.2" #define VERSION "2.11.3"
static const char *const Version = VERSION; static const char *const Version = VERSION;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM

File diff suppressed because one or more lines are too long

2
third-party/libscan vendored

@ -1 +1 @@
Subproject commit fe53e1a219246d829439bb26093713a415a58924 Subproject commit 3787475ecba7453a2a97ab470103606c2cecabb2