Merge pull request #257 from simon987/dev

v2.11.7
This commit is contained in:
simon987 2022-02-20 08:34:26 -05:00 committed by GitHub
commit 0d18637e88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
75 changed files with 4015 additions and 2272 deletions

View File

@ -28,3 +28,4 @@ sist2
**/ext_libwpd
**/core
*.a
tmp_scan/

View File

@ -21,7 +21,6 @@ set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c

View File

@ -3,11 +3,11 @@ MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
COPY . .
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
RUN strip sist2 || mv sist2_debug sist2
FROM ubuntu:21.10
FROM --platform="linux/amd64" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*

7
contrib/systemd/Makefile Normal file
View File

@ -0,0 +1,7 @@
install:
install sist2-update-all.sh /usr/bin/sist2-update-all.sh
install sist2-update-files.sh /usr/bin/sist2-update-files.sh
install sist2-update-nextcloud.sh /usr/bin/sist2-update-nextcloud.sh
install sist2-update.service /etc/systemd/system/sist2-update.service
install sist2-update.timer /etc/systemd/system/sist2-update.timer
systemctl daemon-reload

31
contrib/systemd/README.md Normal file
View File

@ -0,0 +1,31 @@
# Systemd integration example
This example contains my (yatli) personal configuration for sist2 auto-updating.
The following indices are involved in this configuration:
| Index | Path | Description |
|-----------|------------------|--------------------------------------------|
| files | /zpool/files | Main file repository |
| nextcloud | /zpool/nextcloud | Externally synchronized to a cloud account |
The systemd integration achieves automatic sist2 scanning & indexing everyday at 3:00AM.
### Tailoring the configuration for yourself
`sist2-update-all.sh` calls update scripts for each sist2 index. Add or remove
update scripts accordingly to suit your need. Each update script (e.g.
`sist2-update-files.sh`) has important parameters laid down at the beginning so
make sure to edit them to point to your files and index locations.
### Installation
```bash
# install the services and scripts
sudo make install
# enable & start the timer
sudo systemctl enable sist2-update.timer
sudo systemctl start sist2-update.timer
# verify that the timer has been enabled
systemctl list-timers --all
```

View File

@ -0,0 +1,9 @@
#!/bin/bash
set -e
__dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "Update index: Files"
source ${__dir}/sist2-update-files.sh
echo "Update index: Nextcloud"
source ${__dir}/sist2-update-nextcloud.sh
echo "Done. Restarting sist2."
docker restart sist2-sist2-1

View File

@ -0,0 +1,34 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/files
ORIG=/mnt/ssd/sist-index/files.idx
NEW=/mnt/ssd/sist-index/files_$DATE.idx
EXCLUDE='ZArchives|TorrentStore|TorrentDownload|624f0c59-1fef-44f6-95e9-7483296f2833|ubuntu-full-2021-12-07'
NAME=Files
#REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index
echo ">>> Index complete"

View File

@ -0,0 +1,33 @@
#!/bin/bash
set -e
DATE=$(date +%Y_%m_%d)
CONTENT=/zpool/nextcloud/v-yadli
ORIG=/mnt/ssd/sist-index/nextcloud.idx
NEW=/mnt/ssd/sist-index/nextcloud_$DATE.idx
EXCLUDE='Yatao|.*263418493\\/Image\\/.*'
NAME=NextCloud
# REWRITE_URL="http://localhost:33333/activate?collection=$NAME&path="
REWRITE_URL=""
sist2 scan \
--threads 14 \
--mem-throttle 32768 \
--quality 1.0 \
--name $NAME \
--ocr-lang=eng+chi_sim \
--ocr-ebooks \
--ocr-images \
--exclude=$EXCLUDE \
--rewrite-url=$REWRITE_URL \
--incremental=$ORIG \
--output=$NEW \
$CONTENT
echo ">>> Scan complete"
rm -rf $ORIG
mv $NEW $ORIG
unset http_proxy
unset https_proxy
unset HTTP_PROXY
unset HTTPS_PROXY
sist2 index $ORIG --incremental-index

View File

@ -0,0 +1,6 @@
[Unit]
Description=sist2-update
[Service]
User=yatli
ExecStart=/bin/bash /usr/bin/sist2-update-all.sh

View File

@ -0,0 +1,10 @@
[Unit]
Description=sist2-update
[Timer]
OnCalendar=*-*-* 3:00:00
Persistent=true
Unit=sist2-update.service
[Install]
WantedBy=timers.target

View File

@ -39,7 +39,7 @@
"index": false
},
"thumbnail": {
"type": "keyword",
"type": "integer",
"index": false
},
"videoc": {

2
scripts/start_dev_es.sh Executable file
View File

@ -0,0 +1,2 @@
docker run --rm -it -p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,3 +0,0 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"><title>sist2</title><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="css/index.css" rel="preload" as="style"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
height: initial;
}</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/index.js"></script></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
},
"dependencies": {
"@egjs/vue-infinitegrid": "3.3.0",
"axios": "^0.21.1",
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",

View File

@ -146,6 +146,7 @@ html, body {
.theme-black .nav-tabs .nav-link {
color: #e0e0e0;
border-radius: 0;
}
.theme-black .nav-tabs .nav-item.show .nav-link, .theme-black .nav-tabs .nav-link.active {
@ -309,4 +310,8 @@ mark {
display: inline-block;
width: 40%;
}
.pointer {
cursor: pointer;
}
</style>

View File

@ -62,8 +62,9 @@ export interface EsHit {
isPlayableImage: boolean
isAudio: boolean
hasThumbnail: boolean
tnW: number
tnH: number
hasVidPreview: boolean
/** Number of thumbnails available */
tnNum: number
}
highlight: {
name: string[] | undefined,
@ -134,8 +135,15 @@ class Sist2Api {
if ("thumbnail" in hit._source) {
hit._props.hasThumbnail = true;
hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
if (Number.isNaN(Number(hit._source.thumbnail))) {
// Backwards compatibility
hit._props.tnNum = 1;
hit._props.hasVidPreview = false;
} else {
hit._props.tnNum = Number(hit._source.thumbnail);
hit._props.hasVidPreview = hit._props.tnNum > 1;
}
}
switch (mimeCategory) {

View File

@ -1,5 +1,31 @@
<template>
<div v-if="$store.state.optUseDatePicker">
<b-row>
<b-col sm="6">
<b-form-datepicker
value-as-date
:date-format-options="{ year: 'numeric', month: '2-digit', day: '2-digit' }"
:locale="$store.state.optLang"
class="mb-2"
:value="dateMin" @input="setDateMin"></b-form-datepicker>
</b-col>
<b-col sm="6">
<b-form-datepicker
value-as-date
:date-format-options="{ year: 'numeric', month: '2-digit', day: '2-digit' }"
:locale="$store.state.optLang"
class="mb-2"
:value="dateMax" @input="setDateMax"></b-form-datepicker>
</b-col>
</b-row>
</div>
<div v-else>
<b-row>
<b-col style="height: 70px;">
<div id="dateSlider"></div>
</b-col>
</b-row>
</div>
</template>
<script>
@ -10,11 +36,36 @@ import {mergeTooltips} from "@/util-js";
export default {
name: "DateSlider",
methods: {
setDateMin(val) {
const epochDate = Math.ceil(+val / 1000);
this.$store.commit("setDateMin", epochDate);
},
setDateMax(val) {
const epochDate = Math.ceil(+val / 1000);
this.$store.commit("setDateMax", epochDate);
},
},
computed: {
dateMin() {
const dateMin = this.$store.state.dateMin ? this.$store.state.dateMin : this.$store.state.dateBoundsMin;
return new Date(dateMin * 1000)
},
dateMax() {
const dateMax = this.$store.state.dateMax ? this.$store.state.dateMax : this.$store.state.dateBoundsMax;
return new Date(dateMax * 1000)
}
},
mounted() {
this.$store.subscribe((mutation) => {
if (mutation.type === "setDateBoundsMax") {
const elem = document.getElementById("dateSlider");
if (elem === null) {
// Using b-form-datepicker, skip initialisation of slider
return
}
if (elem.children.length > 0) {
return;
}

View File

@ -30,6 +30,7 @@ export default {
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
{key: "tagline", value: this.$store.state.sist2Info.tagline},
{key: "dev", value: this.$store.state.sist2Info.dev},
{key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion},
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},

View File

@ -1,5 +1,6 @@
<template>
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`">
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`"
@click="$store.commit('busTnTouchStart', null)">
<b-card
no-body
img-top
@ -10,39 +11,7 @@
<ContentDiv :doc="doc"></ContentDiv>
<!-- Thumbnail-->
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
<div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
</div>
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div v-if="doc._props.isPlayableVideo" class="play">
<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
<path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
</svg>
</div>
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
class="fit card-img-top">
</div>
<FullThumbnail :doc="doc" :small-badge="smallBadge" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>
<!-- Audio player-->
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
@ -73,31 +42,19 @@ import TagContainer from "@/components/TagContainer.vue";
import DocFileTitle from "@/components/DocFileTitle.vue";
import DocInfoModal from "@/components/DocInfoModal.vue";
import ContentDiv from "@/components/ContentDiv.vue";
import FullThumbnail from "@/components/FullThumbnail";
export default {
components: {ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
props: ["doc", "width"],
data() {
return {
ext: ext,
showInfo: false,
hover: false
}
},
computed: {
placeHolderStyle() {
const tokens = this.doc._source.thumbnail.split(",");
const w = Number(tokens[0]);
const h = Number(tokens[1]);
const MAX_HEIGHT = 400;
return {
height: `${Math.min((h / w) * this.width, MAX_HEIGHT)}px`,
}
},
smallBadge() {
return this.width < 150;
}
@ -119,31 +76,10 @@ export default {
}
});
},
onTnEnter() {
this.hover = true;
},
onTnLeave() {
this.hover = false;
},
tnHeight() {
return this.$refs.tn.height;
}
},
}
</script>
<style>
.img-wrapper {
position: relative;
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}
.fit {
display: block;
min-width: 64px;
@ -153,15 +89,17 @@ export default {
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
</style>
<style scoped>
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.padding-03 {
padding: 0.3rem;
}
@ -179,55 +117,11 @@ export default {
padding: 0.3rem;
}
.thumbnail-placeholder {
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.play {
position: absolute;
width: 25px;
height: 25px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.doc-card {
padding-left: 3px;
padding-right: 3px;
}
.small-badge {
padding: 1px 3px;
font-size: 70%;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
.sub-document .card {
background: #AB47BC1F !important;
}

View File

@ -2,7 +2,10 @@
<b-modal :visible="show" size="lg" :hide-footer="true" static lazy @close="$emit('close')" @hide="$emit('close')"
>
<template #modal-title>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">
{{ doc._source.name + ext(doc) }}
<router-link :to="`/file?byId=${doc._id}`">#</router-link>
</h5>
</template>
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">

View File

@ -1,11 +1,13 @@
<template>
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}"
@mouseenter="onTnEnter()" @mouseleave="onTnLeave()" >
@mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
<!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
<div class="media ml-2">
<!-- Thumbnail-->
<div v-if="doc._props.hasThumbnail" class="align-self-start mr-2 wrapper-sm">
<div class="img-wrapper">
<div v-if="doc._props.isPlayableVideo" class="play">
@ -26,6 +28,7 @@
<FileIcon></FileIcon>
</div>
<!-- Doc line-->
<div class="doc-line ml-3">
<div style="display: flex">
<span class="info-icon" @click="showInfo = true"></span>
@ -154,6 +157,7 @@ export default {
.list-group-item .img-wrapper {
width: 88px;
height: 88px;
position: relative;
}
.fit-sm {

View File

@ -0,0 +1,173 @@
<template>
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()"
@touchstart="onTouchStart()">
<div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
</div>
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div v-if="doc._props.isPlayableVideo" class="play">
<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
<path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
</svg>
</div>
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="tnSrc"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="tnSrc" alt=""
class="fit card-img-top">
<ThumbnailProgressBar v-if="hover && doc._props.hasVidPreview"
:progress="(currentThumbnailNum + 1) / (doc._props.tnNum)"
></ThumbnailProgressBar>
</div>
</template>
<script>
import {humanTime} from "@/util";
import ThumbnailProgressBar from "@/components/ThumbnailProgressBar";
export default {
name: "FullThumbnail",
props: ["doc", "smallBadge"],
components: {ThumbnailProgressBar},
data() {
return {
hover: false,
currentThumbnailNum: 0,
timeoutId: null
}
},
created() {
this.$store.subscribe((mutation) => {
if (mutation.type === "busTnTouchStart" && mutation.payload !== this.doc._id) {
this.onTnLeave();
}
});
},
computed: {
tnSrc() {
const doc = this.doc;
const props = doc._props;
if (props.isGif && this.hover) {
return `f/${doc._id}`;
}
return (this.currentThumbnailNum === 0)
? `t/${doc._source.index}/${doc._id}`
: `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
},
},
methods: {
humanTime: humanTime,
onThumbnailClick() {
this.$emit("onThumbnailClick");
},
tnHeight() {
return this.$refs.tn.height;
},
tnWidth() {
return this.$refs.tn.width;
},
onTnEnter() {
this.hover = true;
if (this.doc._props.hasVidPreview) {
this.currentThumbnailNum += 1;
this.scheduleNextTnNum();
}
},
onTnLeave() {
this.currentThumbnailNum = 0;
this.hover = false;
if (this.timeoutId !== null) {
window.clearTimeout(this.timeoutId);
this.timeoutId = null;
}
},
scheduleNextTnNum() {
const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
this.timeoutId = window.setTimeout(() => {
if (!this.hover) {
return;
}
this.scheduleNextTnNum();
if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
this.currentThumbnailNum = 0;
} else {
this.currentThumbnailNum += 1;
}
}, INTERVAL);
},
onTouchStart() {
this.$store.commit("busTnTouchStart", this.doc._id);
if (!this.hover) {
this.onTnEnter()
}
},
}
}
</script>
<style scoped>
.img-wrapper {
position: relative;
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.play {
position: absolute;
width: 25px;
height: 25px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.small-badge {
padding: 1px 3px;
font-size: 70%;
}
</style>

View File

@ -29,7 +29,7 @@
:class="{active: lastClickIndex === idx}"
>
<div class="d-flex">
<b-checkbox @change="toggleIndex(idx)" :checked="isSelected(idx)"></b-checkbox>
<b-checkbox style="pointer-events: none" :checked="isSelected(idx)"></b-checkbox>
{{ idx.name }}
<span class="text-muted timestamp-text ml-2">{{ formatIdxDate(idx.timestamp) }}</span>
</div>
@ -168,4 +168,24 @@ export default Vue.extend({
background-color: inherit;
color: inherit;
}
.theme-black .list-group-item {
border: 1px solid rgba(255,255,255, 0.1);
}
.theme-black .list-group-item:first-child {
border: 1px solid rgba(255,255,255, 0.05);
}
.theme-black .list-group-item.active {
z-index: 2;
background-color: inherit;
color: inherit;
border: 1px solid rgba(255,255,255, 0.3);
border-radius: 0;
}
.theme-black .list-group {
border-radius: 0;
}
</style>

View File

@ -1,6 +1,5 @@
<template>
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0 mt-4">
<template #cell(value)="data">
<span v-if="'html' in data.item" v-html="data.item.html"></span>
<span v-else>{{ data.value }}</span>
@ -33,12 +32,18 @@ function dmsToDecimal(dms, ref) {
export default {
name: "InfoTable",
props: ["doc"],
data() {
return {
indexName: "loading..."
}
},
computed: {
tableItems() {
this.indexName;
const src = this.doc._source;
const items = [
{key: "index", value: `[${this.$store.getters.indexMap[src.index].name}]`},
{key: "index", value: `[${this.indexName}]`},
{key: "mtime", value: humanDate(src.mtime)},
{key: "mime", value: src.mime},
{key: "size", value: humanFileSize(src.size)},
@ -85,7 +90,16 @@ export default {
return items;
}
},
mounted() {
if (this.$store.getters.indexMap[this.doc.index]) {
this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
}
window.setTimeout(() => {
this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
}, 500)
},
}
</script>

View File

@ -3,7 +3,7 @@
<p>
<b>{{
`[${$store.getters.indices.find(i => i.id === hit._source.index).name}]`
}}</b>{{ `/${hit._source.path}/${hit._source.name}${ext(hit)}` }}
}}</b>{{ `${hit._source.path === '' ? '' : '/'}${hit._source.path}/${hit._source.name}${ext(hit)}` }}
</p>
<p style="margin-top: -1em">
<span v-if="hit._source.width">{{ `${hit._source.width}x${hit._source.height}`}}</span>

View File

@ -16,7 +16,8 @@ export default {
data() {
return {
mimeTree: null,
stashedMimeTreeAttributes: null
stashedMimeTreeAttributes: null,
updateBusy: false
}
},
mounted() {
@ -34,6 +35,10 @@ export default {
return;
}
if (this.updateBusy) {
return;
}
this.$store.commit("setSelectedMimeTypes", getSelectedTreeNodes(this.mimeTree));
},
updateTree() {
@ -42,6 +47,11 @@ export default {
return;
}
if (this.updateBusy) {
return
}
this.updateBusy = true;
if (this.stashedMimeTreeAttributes === null) {
this.stashedMimeTreeAttributes = getTreeNodeAttributes(this.mimeTree);
}
@ -78,6 +88,7 @@ export default {
}
});
this.stashedMimeTreeAttributes = null;
this.updateBusy = false;
});
},

View File

@ -0,0 +1,40 @@
<template>
<div class="thumbnail-progress-bar" :style="{width: `${percentProgress}%`}"></div>
</template>
<script>
export default {
name: "ThumbnailProgressBar",
props: ["doc", "progress"],
computed: {
percentProgress() {
return Math.min(Math.max(this.progress * 100, 0), 100);
}
}
}
</script>
<style scoped>
.thumbnail-progress-bar {
position: absolute;
left: 0;
bottom: 0;
height: 4px;
background: #2196f3AA;
z-index: 9;
}
.theme-black .thumbnail-progress-bar {
background: rgba(0, 188, 212, 0.95);
}
.sub-document .thumbnail-progress-bar {
max-width: calc(100% - 8px);
left: 4px;
}
</style>

View File

@ -1,5 +1,8 @@
export default {
en: {
filePage: {
notFound: "Not found"
},
searchBar: {
simple: "Search",
advanced: "Advanced search",
@ -67,7 +70,9 @@ export default {
tagOrOperator: "Use OR operator when specifying multiple tags.",
hideDuplicates: "Hide duplicate results based on checksum",
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time"
updateMimeMap: "Update the Media Types tree in real time",
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms"
},
queryMode: {
simple: "Simple",
@ -161,6 +166,9 @@ export default {
},
},
fr: {
filePage: {
notFound: "Ficher introuvable"
},
searchBar: {
simple: "Recherche",
advanced: "Recherche avancée",
@ -229,7 +237,9 @@ export default {
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
hideDuplicates: "Masquer les résultats en double",
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel"
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
},
queryMode: {
simple: "Simple",
@ -324,6 +334,9 @@ export default {
},
},
"zh-CN": {
filePage: {
notFound: "未找到"
},
searchBar: {
simple: "搜索",
advanced: "高级搜索",
@ -391,7 +404,9 @@ export default {
tagOrOperator: "使用或操作OR匹配多个标签。",
hideDuplicates: "使用校验码隐藏重复结果",
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新"
updateMimeMap: "媒体类型树的实时更新",
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位"
},
queryMode: {
simple: "简单",

View File

@ -3,6 +3,7 @@ import VueRouter, {RouteConfig} from "vue-router"
import StatsPage from "../views/StatsPage.vue"
import Configuration from "../views/Configuration.vue"
import SearchPage from "@/views/SearchPage.vue";
import FilePage from "@/views/FilePage.vue";
Vue.use(VueRouter)
@ -21,6 +22,11 @@ const routes: Array<RouteConfig> = [
path: "/config",
name: "Configuration",
component: Configuration
},
{
path: "/file",
name: "File",
component: FilePage
}
]

View File

@ -48,7 +48,9 @@ export default new Vuex.Store({
optLightboxLoadOnlyCurrent: false,
optLightboxSlideDuration: 15,
optHideLegacy: false,
optUpdateMimeMap: true,
optUpdateMimeMap: false,
optUseDatePicker: false,
optVidPreviewInterval: 700,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@ -157,6 +159,8 @@ export default new Vuex.Store({
setOptTreemapColor: (state, val) => state.optTreemapColor = val,
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@ -172,6 +176,12 @@ export default new Vuex.Store({
busSearch: () => {
// noop
},
busTouchEnd: () => {
// noop
},
busTnTouchStart: (doc_id) => {
// noop
},
},
actions: {
setSist2Info: (store, val) => {
@ -366,5 +376,7 @@ export default new Vuex.Store({
optResultSize: state => state.size,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
}
})

View File

@ -1,8 +1,12 @@
import {EsHit} from "@/Sist2Api";
export function ext(hit: EsHit) {
return Object.prototype.hasOwnProperty.call(hit._source, "extension")
&& hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
return srcExt(hit._source)
}
export function srcExt(src) {
return Object.prototype.hasOwnProperty.call(src, "extension")
&& src["extension"] !== "" ? "." + src["extension"] : "";
}
export function strUnescape(str: string): string {

View File

@ -41,6 +41,10 @@
<b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap">
{{ $t("opt.updateMimeMap") }}
</b-form-checkbox>
<b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
{{ $t("opt.useDatePicker") }}
</b-form-checkbox>
</b-card>
<br/>
@ -81,6 +85,10 @@
<label>{{ $t("opt.slideDuration") }}</label>
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
@input="setOptLightboxSlideDuration"></b-form-input>
<label>{{ $t("opt.vidPreviewInterval") }}</label>
<b-form-input :value="optVidPreviewInterval" type="number" min="50"
@input="setOptVidPreviewInterval"></b-form-input>
</b-card>
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
@ -229,6 +237,8 @@ export default {
"optHideDuplicates",
"optHideLegacy",
"optUpdateMimeMap",
"optUseDatePicker",
"optVidPreviewInterval",
]),
clientWidth() {
return window.innerWidth;
@ -272,7 +282,9 @@ export default {
"setOptLang",
"setOptHideDuplicates",
"setOptHideLegacy",
"setOptUpdateMimeMap"
"setOptUpdateMimeMap",
"setOptUseDatePicker",
"setOptVidPreviewInterval",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@ -0,0 +1,131 @@
<template>
<div style="margin-left: auto; margin-right: auto;" class="container">
<Preloader v-if="loading"></Preloader>
<b-card v-else-if="!loading && found">
<b-card-title :title="doc._source.name + ext(doc)">
{{ doc._source.name + ext(doc) }}
</b-card-title>
<!-- Thumbnail-->
<div style="position: relative; margin-left: auto; margin-right: auto; text-align: center">
<FullThumbnail :doc="doc" :small-badge="false" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>
</div>
<!-- Audio player-->
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
:type="doc._source.mime"
:src="`f/${doc._id}`"></audio>
<InfoTable :doc="doc" v-if="doc"></InfoTable>
<div v-if="doc._source.content" class="content-div">{{ doc._source.content }}</div>
</b-card>
<div v-else>
<b-card>
<b-card-title>{{ $t("filePage.notFound") }}</b-card-title>
</b-card>
</div>
</div>
</template>
<script>
import Preloader from "@/components/Preloader.vue";
import InfoTable from "@/components/InfoTable.vue";
import Sist2Api from "@/Sist2Api";
import {ext} from "@/util";
import Vue from "vue";
import sist2 from "@/Sist2Api";
import FullThumbnail from "@/components/FullThumbnail";
export default Vue.extend({
name: "FilePage",
components: {
FullThumbnail,
Preloader,
InfoTable
},
data() {
return {
loading: true,
found: false,
doc: null
}
},
methods: {
ext: ext,
onThumbnailClick() {
window.open(`/f/${this.doc._id}`, "_blank");
},
findById(id) {
return {
query: {
bool: {
must: [
{
match: {
"_id": id
}
}
]
}
},
size: 1
}
},
findByName(name) {
return {
query: {
bool: {
must: [
{
match: {
"name": name
}
}
]
}
},
size: 1
}
}
},
mounted() {
if (this.$store.state.sist2Info === null) {
sist2.getSist2Info().then(data => {
this.$store.dispatch("setSist2Info", data);
this.$store.commit("setIndices", data.indices);
});
}
let query = null;
if (this.$route.query.byId) {
query = this.findById(this.$route.query.byId);
} else if (this.$route.query.byName) {
query = this.findByName(this.$route.query.byName);
}
if (query) {
Sist2Api.esQuery(query).then(result => {
if (result.hits.hits.length === 0) {
this.found = false;
} else {
this.doc = result.hits.hits[0];
this.found = true;
}
this.loading = false;
});
} else {
this.loading = false;
this.found = false;
}
}
});
</script>
<style scoped>
.img-wrapper {
display: inline-block;
}
</style>

View File

@ -19,11 +19,7 @@
</b-row>
<b-row>
<b-col sm="6">
<b-row>
<b-col style="height: 70px;">
<DateSlider></DateSlider>
</b-col>
</b-row>
<b-row>
<b-col>
<IndexPicker></IndexPicker>
@ -104,6 +100,10 @@ export default Vue.extend({
...mapGetters(["indices", "optDisplay"]),
},
mounted() {
// Handle touch events
window.ontouchend = () => this.$store.commit("busTouchEnd");
window.ontouchcancel = this.$store.commit("busTouchEnd");
this.search = _debounce(async (clear: boolean) => {
if (clear) {
await this.clearResults();

110
src/cli.c
View File

@ -5,7 +5,8 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 1
#define DEFAULT_SIZE 300
#define DEFAULT_THUMBNAIL_SIZE 500
#define DEFAULT_THUMBNAIL_COUNT 1
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
@ -19,6 +20,8 @@
#define DEFAULT_MAX_MEM_BUFFER 2000
#define DEFAULT_THROTTLE_MEMORY_THRESHOLD 0
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
@ -65,6 +68,10 @@ void index_args_destroy(index_args_t *args) {
if (args->es_settings_path) {
free(args->es_settings);
}
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}
@ -85,13 +92,12 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
char *abs_path = abspath(argv[1]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
} else {
args->path = abs_path;
}
if (args->incremental != NULL) {
if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
@ -99,32 +105,39 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
if (args->quality == 0) {
args->quality = DEFAULT_QUALITY;
} else if (args->quality < 1 || args->quality > 31) {
fprintf(stderr, "Invalid quality: %f\n", args->quality);
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
args->tn_quality = DEFAULT_QUALITY;
} else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
args->tn_quality);
return 1;
}
if (args->size == 0) {
args->size = DEFAULT_SIZE;
} else if (args->size > 0 && args->size < 32) {
printf("Invalid size: %d\n", args->content_size);
if (args->tn_size == OPTION_VALUE_UNSPECIFIED) {
args->tn_size = DEFAULT_THUMBNAIL_SIZE;
} else if (args->tn_size < 32) {
printf("Invalid value --thumbnail-size argument: %d. Must be greater than 32 pixels.\n", args->tn_size);
return 1;
}
if (args->content_size == 0) {
if (args->tn_count == OPTION_VALUE_UNSPECIFIED) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
args->content_size = DEFAULT_CONTENT_SIZE;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
return 1;
}
if (args->output == NULL) {
if (args->output == OPTION_VALUE_UNSPECIFIED) {
args->output = malloc(strlen(DEFAULT_OUTPUT) + 1);
strcpy(args->output, DEFAULT_OUTPUT);
} else {
@ -143,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->depth += 1;
}
if (args->name == NULL) {
if (args->name == OPTION_VALUE_UNSPECIFIED) {
args->name = g_path_get_basename(args->output);
} else {
char *tmp = malloc(strlen(args->name) + 1);
@ -151,11 +164,11 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->name = tmp;
}
if (args->rewrite_url == NULL) {
if (args->rewrite_url == OPTION_VALUE_UNSPECIFIED) {
args->rewrite_url = DEFAULT_REWRITE_URL;
}
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
if (args->archive == OPTION_VALUE_UNSPECIFIED || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
@ -168,17 +181,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->ocr_images && args->tesseract_lang == NULL) {
if (args->ocr_images && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images");
return 1;
}
if (args->ocr_ebooks && args->tesseract_lang == NULL) {
if (args->ocr_ebooks && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks");
return 1;
}
if (args->tesseract_lang != NULL) {
if (args->tesseract_lang != OPTION_VALUE_UNSPECIFIED) {
if (!args->ocr_ebooks && !args->ocr_images) {
fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images");
@ -222,7 +235,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tesseract_path = trained_data_path;
}
if (args->exclude_regex != NULL) {
if (args->exclude_regex != OPTION_VALUE_UNSPECIFIED) {
const char *error;
int error_offset;
@ -242,17 +255,21 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
if (args->treemap_threshold_str == OPTION_VALUE_UNSPECIFIED) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
if (args->max_memory_buffer_mib == OPTION_VALUE_UNSPECIFIED) {
args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
}
if (args->list_path != NULL) {
if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
}
if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
if (strcmp(args->list_path, "-") == 0) {
args->list_file = stdin;
LOG_DEBUG("cli.c", "Using stdin as list file")
@ -265,8 +282,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
@ -283,7 +301,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
return 0;
@ -335,11 +353,9 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
LOG_FATALF("cli.c", "Invalid PATH argument. File not found: %s", argv[1])
} else {
args->index_path = argv[1];
free(index_path);
args->index_path = index_path;
}
if (args->es_url == NULL) {
@ -376,8 +392,16 @@ int index_args_validate(index_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg es_index=%s", args->es_index)
LOG_DEBUGF("cli.c", "arg index_path=%s", args->index_path)
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg async_script=%s", args->async_script)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
LOG_DEBUGF("cli.c", "arg async_script=%d", args->async_script)
if (args->script) {
char log_buf[5000];
strncpy(log_buf, args->script, sizeof(log_buf));
*(log_buf + sizeof(log_buf) - 1) = '\0';
LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
}
LOG_DEBUGF("cli.c", "arg print=%d", args->print)
LOG_DEBUGF("cli.c", "arg es_mappings_path=%s", args->es_mappings_path)
LOG_DEBUGF("cli.c", "arg es_mappings=%s", args->es_mappings)
@ -474,8 +498,7 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
if (abs_path == NULL) {
fprintf(stderr, "File not found: %s\n", args->indices[i]);
return 1;
LOG_FATALF("cli.c", "Index not found: %s", args->indices[i])
}
}
@ -515,11 +538,9 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
char *index_path = abspath(argv[1]);
if (index_path == NULL) {
fprintf(stderr, "File not found: %s\n", argv[1]);
return 1;
LOG_FATALF("cli.c", "Invalid index PATH argument. File not found: %s", argv[1])
} else {
args->index_path = argv[1];
free(index_path);
args->index_path = index_path;
}
if (args->es_url == NULL) {
@ -539,6 +560,11 @@ int exec_args_validate(exec_args_t *args, int argc, const char **argv) {
}
LOG_DEBUGF("cli.c", "arg script_path=%s", args->script_path)
LOG_DEBUGF("cli.c", "arg script=%s", args->script)
char log_buf[5000];
strncpy(log_buf, args->script, sizeof(log_buf));
*(log_buf + sizeof(log_buf) - 1) = '\0';
LOG_DEBUGF("cli.c", "arg script=%s", log_buf)
return 0;
}

View File

@ -5,11 +5,15 @@
#include "libscan/arc/arc.h"
#define OPTION_VALUE_DISABLE (-1)
#define OPTION_VALUE_UNSPECIFIED (0)
typedef struct scan_args {
float quality;
int size;
float tn_quality;
int tn_size;
int content_size;
int threads;
int scan_mem_limit_mib;
char *incremental;
char *output;
char *rewrite_url;
@ -27,8 +31,10 @@ typedef struct scan_args {
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
int max_memory_buffer_mib;
int read_subtitles;
/** Number of thumbnails to generate */
int tn_count;
int fast_epub;
int calculate_checksums;
char *list_path;
@ -44,7 +50,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv);
typedef struct index_args {
char *es_url;
char *es_index;
const char *index_path;
char *index_path;
const char *script_path;
char *script;
const char *es_settings_path;
@ -56,6 +62,7 @@ typedef struct index_args {
int async_script;
int force_reset;
int threads;
int incremental;
} index_args_t;
typedef struct web_args {

View File

@ -35,12 +35,14 @@ typedef struct {
int threads;
int depth;
int calculate_checksums;
size_t mem_limit;
size_t stat_tn_size;
size_t stat_index_size;
GHashTable *original_table;
GHashTable *copy_table;
GHashTable *new_table;
pthread_mutex_t copy_table_mu;
pcre *exclude;
@ -85,6 +87,10 @@ typedef struct {
GHashTable *tags;
store_t *meta_store;
GHashTable *meta;
/**
* Set to false when using --print
*/
int needs_es_connection;
} IndexCtx_t;
typedef struct {

View File

@ -15,19 +15,34 @@ typedef struct es_indexer {
} es_indexer_t;
static __thread es_indexer_t *Indexer;
static __thread es_indexer_t *Indexer = NULL;
void delete_queue(int max);
void free_queue(int max);
void elastic_flush();
void elastic_cleanup() {
elastic_flush();
if (Indexer != NULL) {
free(Indexer->es_index);
free(Indexer->es_url);
free(Indexer);
void destroy_indexer(es_indexer_t *indexer) {
if (indexer == NULL) {
return;
}
LOG_DEBUG("elastic.c", "Destroying indexer")
if (indexer->es_url != NULL) {
free(indexer->es_url);
free(indexer->es_index);
}
free(indexer);
}
void elastic_cleanup() {
if (IndexCtx.needs_es_connection) {
elastic_flush();
}
destroy_indexer(Indexer);
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
@ -52,11 +67,22 @@ void index_json_func(void *arg) {
elastic_index_line(line);
}
void delete_document(const char* document_id_str, void* UNUSED(_data)) {
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t));
bulk_line->type = ES_BULK_LINE_DELETE;
bulk_line->next = NULL;
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
bulk_line->type = ES_BULK_LINE_INDEX;
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
*(bulk_line->line + json_len) = '\n';
@ -125,9 +151,19 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
size_t buf_cur = 0;
char *buf = malloc(8192);
size_t buf_capacity = 8192;
#define GROW_BUF(delta) \
while (buf_size + (delta) > buf_capacity) { \
buf_capacity *= 2; \
buf = realloc(buf, buf_capacity); \
} \
buf_size += (delta); \
// see: https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
// ES_BULK_LINE_INDEX: two lines, 1st action, 2nd content
// ES_BULK_LINE_DELETE: one line
while (line != NULL && *count < max) {
char action_str[256];
if (line->type == ES_BULK_LINE_INDEX) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
@ -137,18 +173,25 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
size_t action_str_len = strlen(action_str);
size_t line_len = strlen(line->line);
while (buf_size + line_len + action_str_len > buf_capacity) {
buf_capacity *= 2;
buf = realloc(buf, buf_capacity);
}
buf_size += line_len + action_str_len;
GROW_BUF(action_str_len + line_len);
memcpy(buf + buf_cur, action_str, action_str_len);
buf_cur += action_str_len;
memcpy(buf + buf_cur, line->line, line_len);
buf_cur += line_len;
} else if (line->type == ES_BULK_LINE_DELETE) {
snprintf(
action_str, sizeof(action_str),
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
GROW_BUF(action_str_len);
memcpy(buf + buf_cur, action_str, action_str_len);
buf_cur += action_str_len;
}
line = line->next;
(*count)++;
}
@ -223,7 +266,7 @@ void _elastic_flush(int max) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
free_response(r);
free(buf);
delete_queue(1);
free_queue(1);
if (Indexer->queued != 0) {
elastic_flush();
}
@ -248,13 +291,13 @@ void _elastic_flush(int max) {
} else if (r->status_code != 200) {
print_errors(r);
delete_queue(Indexer->queued);
free_queue(Indexer->queued);
} else {
print_errors(r);
LOG_DEBUGF("elastic.c", "Indexed %d documents (%zukB) <%d>", count, buf_len / 1024, r->status_code);
delete_queue(max);
free_queue(max);
if (Indexer->queued != 0) {
elastic_flush();
@ -265,7 +308,7 @@ void _elastic_flush(int max) {
free(buf);
}
void delete_queue(int max) {
void free_queue(int max) {
for (int i = 0; i < max; i++) {
es_bulk_line_t *tmp = Indexer->line_head;
Indexer->line_head = tmp->next;
@ -309,16 +352,22 @@ void elastic_index_line(es_bulk_line_t *line) {
es_indexer_t *create_indexer(const char *url, const char *index) {
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
if (IndexCtx.needs_es_connection) {
char *es_url = malloc(strlen(url) + 1);
strcpy(es_url, url);
char *es_index = malloc(strlen(index) + 1);
strcpy(es_index, index);
es_indexer_t *indexer = malloc(sizeof(es_indexer_t));
indexer->es_url = es_url;
indexer->es_index = es_index;
} else {
indexer->es_url = NULL;
indexer->es_index = NULL;
}
indexer->queued = 0;
indexer->line_head = NULL;
indexer->line_tail = NULL;

View File

@ -3,9 +3,13 @@
#include "src/sist.h"
#define ES_BULK_LINE_INDEX 0
#define ES_BULK_LINE_DELETE 1
typedef struct es_bulk_line {
struct es_bulk_line *next;
char path_md5_str[MD5_STR_LENGTH];
int type;
char line[0];
} es_bulk_line_t;
@ -40,6 +44,8 @@ void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void delete_document(const char *document_id_str, void* data);
es_indexer_t *create_indexer(const char *url, const char *index);
void elastic_cleanup();

File diff suppressed because one or more lines are too long

View File

@ -133,6 +133,7 @@ char *build_json_string(document_t *doc) {
while (meta != NULL) {
switch (meta->key) {
case MetaThumbnail:
case MetaPages:
case MetaWidth:
case MetaHeight:
@ -163,7 +164,6 @@ char *build_json_string(document_t *doc) {
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:
@ -398,7 +398,7 @@ void read_index_bin_handle_line(const char *line, const char *index_id, index_fu
}
}
void read_index_ndjson(const char *path, const char *index_id, index_func func) {
void read_lines(const char *path, const line_processor_t processor) {
dyn_buffer_t buf = dyn_buffer_create();
// Initialize zstd things
@ -427,7 +427,7 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)
if (c == '\n') {
dyn_buffer_write_char(&buf, '\0');
read_index_bin_handle_line(buf.buf, index_id, func);
processor.func(buf.buf, processor.data);
buf.cur = 0;
} else {
dyn_buffer_write_char(&buf, c);
@ -452,12 +452,22 @@ void read_index_ndjson(const char *path, const char *index_id, index_func func)
dyn_buffer_destroy(&buf);
fclose(file);
}
void read_index_ndjson(const char *line, void* _data) {
void** data = _data;
const char* index_id = data[0];
index_func func = data[1];
read_index_bin_handle_line(line, index_id, func);
}
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
read_index_ndjson(path, index_id, func);
read_lines(path, (line_processor_t) {
.data = (void*[2]){(void*)index_id, func} ,
.func = read_index_ndjson,
});
}
}
@ -476,6 +486,7 @@ void incremental_read(GHashTable *table, const char *filepath, index_descriptor_
}
static __thread GHashTable *IncrementalCopyTable = NULL;
static __thread GHashTable *IncrementalNewTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL;
@ -524,3 +535,33 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
}
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
char path_md5_n[MD5_STR_LENGTH + 1];
path_md5_n[MD5_STR_LENGTH] = '\0';
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
// do not delete archive virtual entries
if (cJSON_GetObjectItem(document, "parent") == NULL
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
) {
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
}
}
void incremental_delete(const char *del_filepath, const char* index_filepath,
GHashTable *copy_table, GHashTable *new_table) {
if (WriterCtx.out_file == NULL) {
initialize_writer_ctx(del_filepath);
}
IncrementalCopyTable = copy_table;
IncrementalNewTable = new_table;
read_index(index_filepath, "", INDEX_TYPE_NDJSON, incremental_delete_handle_doc);
}

View File

@ -7,13 +7,23 @@
#include <sys/syscall.h>
#include <glib.h>
typedef struct line_processor {
void* data;
void (*func)(const char*, void*);
} line_processor_t;
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
void incremental_delete(const char *del_filepath, const char* index_filepath,
GHashTable *copy_table, GHashTable *new_table);
void write_document(document_t *doc);
void read_lines(const char *path, const line_processor_t processor);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
@ -29,4 +39,18 @@ void write_index_descriptor(char *path, index_descriptor_t *desc);
index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
action_ok; \
} \
#endif

View File

@ -55,7 +55,16 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}

View File

@ -48,6 +48,12 @@ void vsist_logf(const char *filepath, int level, char *format, va_list ap) {
size_t maxsize = sizeof(log_str) - log_len;
log_len += vsnprintf(log_str + log_len, maxsize, format, ap);
if (log_len >= maxsize) {
fprintf(stderr, "([%s] FIXME: Log string is too long to display: %dB)\n",
log_levels[level], log_len);
return;
}
if (is_tty) {
log_len += sprintf(log_str + log_len, "\033[0m\n");
} else {

View File

@ -189,37 +189,41 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
ScanCtx.comic_ctx.tn_size = args->tn_size;
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ebook_ctx.tn_size = args->tn_size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
ScanCtx.ebook_ctx.tn_qscale = args->quality;
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
ScanCtx.media_ctx.tn_size = args->tn_size;
ScanCtx.media_ctx.tn_count = args->tn_count;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
ScanCtx.media_ctx.read_subtitles = args->tn_count;
if (args->ocr_images) {
ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang;
@ -228,6 +232,7 @@ void initialize_scan_context(scan_args_t *args) {
init_media();
// OOXML
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
@ -244,7 +249,8 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
ScanCtx.msdoc_ctx.tn_size = args->tn_size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
@ -253,6 +259,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@ -262,8 +269,9 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
ScanCtx.raw_ctx.tn_size = args->tn_size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
@ -282,37 +290,87 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.json_ctx.ndjson_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/ndjson");
}
/**
* Loads an existing index as the baseline for incremental scanning.
* 1. load old index files (original+main) => original_table
* 2. allocate empty table => copy_table
* 3. allocate empty table => new_table
* the original_table/copy_table/new_table will be populated in parsing/parse.c:parse
* and consumed in main.c:save_incremental_index
*
* Note: the existing index may or may not be of incremental index form.
*/
void load_incremental_index(const scan_args_t *args) {
char file_path[PATH_MAX];
ScanCtx.original_table = incremental_get_table();
ScanCtx.copy_table = incremental_get_table();
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
LOG_FATALF("main.c", "Could not open original index for incremental scan: %s", strerror(errno))
}
ScanCtx.new_table = incremental_get_table();
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->incremental);
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
if (strcmp(original_desc.version, Version) != 0) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index", sizeof("_index") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_read(ScanCtx.original_table, file_path, &original_desc);
}
}
closedir(dir);
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
/**
* Saves an incremental index.
* Before calling this function, the scanner should have finished writing the main index.
* 1. Build original_table - new_table => delete_table
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
*/
void save_incremental_index(scan_args_t *args) {
char dst_path[PATH_MAX];
char store_path[PATH_MAX];
char file_path[PATH_MAX];
char del_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
READ_INDICES(file_path, args->incremental,
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
perror("incremental_delete"), 1);
writer_cleanup();
READ_INDICES(file_path, args->incremental,
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
perror("incremental_copy"), 1);
writer_cleanup();
store_destroy(source);
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
}
/**
* An index can be either incremental or non-incremental (initial index).
* For an initial index, there is only the "main" index.
* For an incremental index, there are, additionally:
* - An "original" index, referencing all files unchanged since the previous index.
* - A "delete" index, referencing all files that exist in the previous index, but deleted since then.
* Therefore, for an incremental index, "main"+"original" covers all the current files in the live filesystem,
* and is orthognal with the "delete" index. When building an incremental index upon an old incremental index,
* the old "delete" index can be safely ignored.
*/
void sist2_scan(scan_args_t *args) {
ScanCtx.mime_table = mime_get_mime_table();
@ -335,10 +393,10 @@ void sist2_scan(scan_args_t *args) {
load_incremental_index(args);
}
ScanCtx.pool = tpool_create(args->threads, thread_cleanup, TRUE, TRUE);
ScanCtx.pool = tpool_create(ScanCtx.threads, thread_cleanup, TRUE, TRUE, ScanCtx.mem_limit);
tpool_start(ScanCtx.pool);
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE, 0);
tpool_start(ScanCtx.writer_pool);
if (args->list_path) {
@ -364,35 +422,11 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
char dst_path[PATH_MAX];
snprintf(store_path, PATH_MAX, "%sthumbs", args->incremental);
snprintf(dst_path, PATH_MAX, "%s_index_original.ndjson.zst", ScanCtx.index.path);
store_t *source = store_create(store_path, STORE_SIZE_TN);
DIR *dir = opendir(args->incremental);
if (dir == NULL) {
perror("opendir");
return;
}
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", args->incremental, de->d_name);
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table);
}
}
closedir(dir);
store_destroy(source);
writer_cleanup();
snprintf(store_path, PATH_MAX, "%stags", args->incremental);
snprintf(dst_path, PATH_MAX, "%stags", ScanCtx.index.path);
store_t *source_tags = store_create(store_path, STORE_SIZE_TAG);
store_copy(source_tags, dst_path);
store_destroy(source_tags);
save_incremental_index(args);
}
generate_stats(&ScanCtx.index, args->treemap_threshold, ScanCtx.index.path);
@ -402,17 +436,19 @@ void sist2_scan(scan_args_t *args) {
}
void sist2_index(index_args_t *args) {
char file_path[PATH_MAX];
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.batch_size = args->batch_size;
IndexCtx.needs_es_connection = !args->print;
if (!args->print) {
if (IndexCtx.needs_es_connection) {
elastic_init(args->force_reset, args->es_mappings, args->es_settings);
}
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
@ -428,11 +464,11 @@ void sist2_index(index_args_t *args) {
}
char path_tmp[PATH_MAX];
snprintf(path_tmp, sizeof(path_tmp), "%s/tags", args->index_path);
snprintf(path_tmp, sizeof(path_tmp), "%stags", args->index_path);
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);
snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
snprintf(path_tmp, sizeof(path_tmp), "%smeta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);
@ -443,32 +479,33 @@ void sist2_index(index_args_t *args) {
f = index_json;
}
void (*cleanup)();
if (args->print) {
cleanup = NULL;
} else {
cleanup = elastic_cleanup;
}
IndexCtx.pool = tpool_create(args->threads, cleanup, FALSE, args->print == 0);
IndexCtx.pool = tpool_create(args->threads, elastic_cleanup, FALSE, args->print == 0, 0);
tpool_start(IndexCtx.pool);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s/%s", args->index_path, de->d_name);
READ_INDICES(file_path, args->index_path, {
read_index(file_path, desc.id, desc.type, f);
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type);
}, {}, !args->incremental);
// Only read the _delete index if we're sending data to ES
if (!args->print) {
snprintf(file_path, PATH_MAX, "%s_index_delete.list.zst", args->index_path);
if (0 == access(file_path, R_OK)) {
read_lines(file_path, (line_processor_t) {
.data = NULL,
.func = delete_document
});
LOG_DEBUGF("main.c", "Read index file %s (%s)", file_path, desc.type)
}
}
closedir(dir);
tpool_wait(IndexCtx.pool);
tpool_destroy(IndexCtx.pool);
if (!args->print) {
if (IndexCtx.needs_es_connection) {
finish_indexer(args->script, args->async_script, desc.id);
}
@ -483,7 +520,7 @@ void sist2_exec_script(exec_args_t *args) {
LogCtx.verbose = TRUE;
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%s/descriptor.json", args->index_path);
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->index_path);
index_descriptor_t desc = read_index_descriptor(descriptor_path);
IndexCtx.es_url = args->es_url;
@ -526,13 +563,34 @@ void sist2_web(web_args_t *args) {
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
strcpy(WebCtx.indices[i].path, abs_path);
printf("Loaded index: %s\n", WebCtx.indices[i].desc.name);
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
free(abs_path);
}
serve(args->listen_address);
}
/**
* Callback to handle options such that
*
* Unspecified -> 0: Set to default value
* Specified "0" -> -1: Disable the option (ex. don't generate thumbnails)
* Negative number -> Raise error
* Specified a valid number -> Continue as normal
*/
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
int specified_value = *(int *) option->value;
if (specified_value == 0) {
*((int *) option->data) = OPTION_VALUE_DISABLE;
}
if (specified_value < 0) {
fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
exit(1);
}
}
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
@ -562,12 +620,21 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
"Total memory threshold in MiB for scan throttling. DEFAULT=0",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
@ -590,8 +657,8 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
"Maximum memory buffer size per thread in MiB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
@ -606,6 +673,8 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url with port. DEFAULT=http://localhost:9200"),
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),

View File

@ -80,16 +80,26 @@ void parse(void *arg) {
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5);
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_skipped_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
CLOSE_FILE(job->vfile)
free(doc->filepath);
free(doc);
return;
}
if (ScanCtx.new_table != NULL) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
@ -123,11 +133,14 @@ void parse(void *arg) {
LOG_ERRORF(job->filepath, "(virtual) read(): [%d] %s", bytes_read, archive_error_string(job->vfile.arc))
}
CLOSE_FILE(job->vfile)
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_failed_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
CLOSE_FILE(job->vfile)
free(doc->filepath);
free(doc);
return;
}

View File

@ -53,13 +53,15 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.11.6"
#define VERSION "2.11.7"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.3"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@ -96,16 +96,8 @@ void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
}
void read_index_into_tables(index_t *index) {
DIR *dir = opendir(index->path);
struct dirent *de;
while ((de = readdir(dir)) != NULL) {
if (strncmp(de->d_name, "_index_", sizeof("_index_") - 1) == 0) {
char file_path[PATH_MAX];
snprintf(file_path, PATH_MAX, "%s%s", index->path, de->d_name);
read_index(file_path, index->desc.id, index->desc.type, fill_tables);
}
}
closedir(dir);
READ_INDICES(file_path, index->path, read_index(file_path, index->desc.id, index->desc.type, fill_tables), {}, 1);
}
static size_t rfind(const char *str, int c) {

View File

@ -28,6 +28,9 @@ typedef struct tpool {
int work_cnt;
int done_cnt;
int busy_cnt;
int throttle_stuck_cnt;
size_t mem_limit;
size_t page_size;
int free_arg;
int stop;
@ -114,13 +117,44 @@ int tpool_add_work(tpool_t *pool, thread_func_t func, void *arg) {
return 1;
}
/**
* see: https://github.com/htop-dev/htop/blob/f782f821f7f8081cb43bbad1c37f32830a260a81/linux/LinuxProcessList.c
*/
__always_inline
static size_t _get_total_mem(tpool_t* pool) {
FILE* statmfile = fopen("/proc/self/statm", "r");
if (!statmfile)
return 0;
long int dummy, dummy2, dummy3, dummy4, dummy5, dummy6;
long int m_resident;
int r = fscanf(statmfile, "%ld %ld %ld %ld %ld %ld %ld",
&dummy, /* m_virt */
&m_resident,
&dummy2, /* m_share */
&dummy3, /* m_trs */
&dummy4, /* unused since Linux 2.6; always 0 */
&dummy5, /* m_drs */
&dummy6); /* unused since Linux 2.6; always 0 */
fclose(statmfile);
if (r == 7) {
return m_resident * pool->page_size;
} else {
return 0;
}
}
/**
* Thread worker function
*/
static void *tpool_worker(void *arg) {
tpool_t *pool = arg;
int stuck_notified = 0;
int throttle_ms = 0;
while (1) {
while (TRUE) {
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;
@ -138,10 +172,35 @@ static void *tpool_worker(void *arg) {
pthread_mutex_unlock(&(pool->work_mutex));
if (work != NULL) {
stuck_notified = 0;
throttle_ms = 0;
while(!pool->stop && pool->mem_limit > 0 && _get_total_mem(pool) >= pool->mem_limit) {
if (!stuck_notified && throttle_ms >= 90000) {
// notify the pool that this thread is stuck.
pthread_mutex_lock(&(pool->work_mutex));
pool->throttle_stuck_cnt += 1;
if (pool->throttle_stuck_cnt == pool->thread_cnt) {
LOG_ERROR("tpool.c", "Throttle memory limit too low, cannot proceed!");
pool->stop = TRUE;
}
pthread_mutex_unlock(&(pool->work_mutex));
stuck_notified = 1;
}
usleep(10000);
throttle_ms += 10;
}
if (pool->stop) {
break;
}
// we are not stuck anymore. cancel our notification.
if (stuck_notified) {
pthread_mutex_lock(&(pool->work_mutex));
pool->throttle_stuck_cnt -= 1;
pthread_mutex_unlock(&(pool->work_mutex));
}
work->func(work->arg);
if (pool->free_arg) {
free(work->arg);
@ -243,18 +302,21 @@ void tpool_destroy(tpool_t *pool) {
* Create a thread pool
* @param thread_cnt Worker threads count
*/
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress) {
tpool_t *tpool_create(int thread_cnt, void cleanup_func(), int free_arg, int print_progress, size_t mem_limit) {
tpool_t *pool = malloc(sizeof(tpool_t));
pool->thread_cnt = thread_cnt;
pool->work_cnt = 0;
pool->done_cnt = 0;
pool->busy_cnt = 0;
pool->throttle_stuck_cnt = 0;
pool->mem_limit = mem_limit;
pool->stop = FALSE;
pool->free_arg = free_arg;
pool->cleanup_func = cleanup_func;
pool->threads = calloc(sizeof(pthread_t), thread_cnt);
pool->print_progress = print_progress;
pool->page_size = getpagesize();
pthread_mutex_init(&(pool->work_mutex), NULL);

View File

@ -8,7 +8,7 @@ typedef struct tpool tpool_t;
typedef void (*thread_func_t)(void *arg);
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress);
tpool_t *tpool_create(int num, void (*cleanup_func)(), int free_arg, int print_progress, size_t mem_limit);
void tpool_start(tpool_t *pool);
void tpool_destroy(tpool_t *pool);

View File

@ -134,10 +134,11 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
}
/**
* Not thread safe!
* Marks a file by adding it to a table.
* !!Not thread safe.
*/
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));

View File

@ -8,12 +8,16 @@
#include <src/ctx.h>
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
"Server: sist2/" VERSION "\r\n"
HTTP_SERVER_HEADER
"Content-Length: %d\r\n"
"%s\r\n\r\n",
status_code, "OK",
@ -60,7 +64,7 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -70,7 +74,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -138,11 +142,17 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int parse_tn_num = FALSE;
if (hm->uri.len != 68) {
if (hm->uri.len != 68 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
parse_tn_num = TRUE;
}
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
@ -158,12 +168,25 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
char *data;
size_t data_len = 0;
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
}
if (data_len != 0) {
send_response_line(
nc, 200, data_len,
@ -173,7 +196,7 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
mg_send(nc, data, data_len);
free(data);
} else {
mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
}
@ -182,7 +205,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, "", "Invalid request");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@ -226,6 +249,11 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
"serving file from disk might not work as expected.")
}
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
@ -246,7 +274,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
"Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
@ -273,6 +301,7 @@ void index_info(struct mg_connection *nc) {
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
@ -314,7 +343,7 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -328,14 +357,14 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -350,7 +379,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -369,7 +398,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
@ -383,7 +412,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -452,7 +481,7 @@ tag_req_t *parse_tag_request(cJSON *json) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -462,14 +491,14 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -615,7 +644,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
mg_http_reply(nc, 404, "", "Page not found");
HTTP_REPLY_NOT_FOUND
}
} else if (ev == MG_EV_POLL) {
@ -645,7 +674,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
free(tmp);
}
mg_http_reply(nc, 500, "", "");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
"Elasticsearch error, see server logs.");
}
free_response(r);
@ -659,7 +689,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address);
LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)
struct mg_mgr mgr;
mg_mgr_init(&mgr);

File diff suppressed because one or more lines are too long

View File

@ -39,7 +39,7 @@ def sist2_index(files, *args):
return iter(sist2_index_to_dict("test_i"))
def sist2_incremental_index(files, func=None, *args):
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
if func:
@ -47,11 +47,13 @@ def sist2_incremental_index(files, func=None, *args):
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
return iter(sist2_index_to_dict("test_i_inc"))
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
def sist2_index_to_dict(index):
res = sist2("index", "--print", index)
def sist2_index_to_dict(index, incremental_index=False):
args = ["--incremental-index"] if incremental_index else []
res = sist2("index", "--print", "--very-verbose", *args, index)
for line in res.splitlines():
if line:
@ -75,6 +77,7 @@ class ScanTest(unittest.TestCase):
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)

View File

@ -6,10 +6,26 @@ set(CMAKE_C_STANDARD 11)
option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
add_compile_definitions(
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
else()
add_compile_definitions(
antiword
NDEBUG
)
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
endif()
add_library(
scan

View File

@ -12,7 +12,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive_entry *entry = NULL;
arc_data_t arc_data;
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}
@ -44,7 +44,20 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
break;
}
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
scan_media_ctx_t media_ctx = {
.tn_count = ctx->enable_tn ? 1 : 0,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL,
.read_subtitles = FALSE,
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);
free(buf);
if (ret == TRUE) {

View File

@ -9,6 +9,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;

View File

@ -155,7 +155,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, pixmap->w, pixmap->h)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
@ -283,7 +283,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
APPEND_LONG_META(doc, MetaPages, page_count)
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc);
@ -404,7 +404,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}

View File

@ -6,6 +6,7 @@
typedef struct {
long content_size;
int tn_size;
int enable_tn;
const char *tesseract_lang;
const char *tesseract_path;
pthread_mutex_t mupdf_mutex;

View File

@ -176,7 +176,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
strcpy(meta_name->str_val, font_name);
APPEND_META(doc, meta_name)
if (ctx->enable_tn == TRUE) {
if (!ctx->enable_tn) {
FT_Done_Face(face);
free(buf);
return;
@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_TN_META(doc, dimensions.width, dimensions.height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);

View File

@ -35,11 +35,6 @@
meta_long->long_val = value; \
APPEND_META(doc, meta_long)}
#define APPEND_TN_META(doc, width, height) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
meta_str->key = MetaThumbnail; \
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
APPEND_META(doc, meta_str)}
#define APPEND_META(doc, meta) \
meta->next = NULL;\

View File

@ -6,7 +6,6 @@
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STREAM_IS_IMAGE (stream->nb_frames <= 1)
#define STORE_AS_IS ((void*)-1)
@ -398,6 +397,110 @@ void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *dec
av_frame_free(&rgb_frame);
}
#define SAVE_THUMBNAIL_OK 0
#define SAVE_THUMBNAIL_SKIPPED 1
#define SAVE_THUMBNAIL_FAILED 2
int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder,
AVStream *stream, int video_stream, document_t *doc, double seek_ratio,
int thumbnail_index) {
if (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ok = FALSE;
double target_timestamp = (double) pFormatCtx->duration * seek_ratio;
long ts = (long) target_timestamp;
int seek_ret = avformat_seek_file(
// Allow +- 1s
pFormatCtx, -1, ts - AV_TIME_BASE, ts, ts + AV_TIME_BASE,
0
);
if (seek_ret == 0) {
seek_ok = TRUE;
} else {
CTX_LOG_DEBUGF(
doc->filepath,
"(media.c) Could not seek media file: %s", av_err2str(seek_ret)
)
}
if (seek_ok == FALSE && thumbnail_index != 0) {
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
return SAVE_THUMBNAIL_FAILED;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
if (thumbnail_index == 0) {
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
return SAVE_THUMBNAIL_FAILED;
}
int return_value;
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
return_value = SAVE_THUMBNAIL_OK;
// TO FIX: the 2nd rendered frame is always broken, just skip it until
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
return_value = SAVE_THUMBNAIL_SKIPPED;
}
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
return return_value;
}
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
@ -458,7 +561,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
append_audio_meta(pFormatCtx, doc);
}
if (video_stream != -1 && ctx->tn_size > 0) {
if (video_stream != -1 && ctx->tn_count > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@ -473,69 +576,38 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
(long) ((double) stream->duration * 0.10), 0);
if (seek_ret == 0) {
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
? 0.10
: 1.0 / (thumbnails_to_generate + 1);
int number_of_thumbnails_generated = 0;
int save_thumbnail_ret;
for (int i = 0; i < thumbnails_to_generate; i++) {
double seek_ratio = seek_increment * i + seek_increment * 0.9;
save_thumbnail_ret = decode_frame_and_save_thumbnail(ctx, pFormatCtx, decoder, stream, video_stream, doc,
seek_ratio, i);
if (save_thumbnail_ret == SAVE_THUMBNAIL_FAILED) {
break;
}
if (save_thumbnail_ret == SAVE_THUMBNAIL_OK) {
number_of_thumbnails_generated += 1;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
if (number_of_thumbnails_generated > 0) {
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
}
if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
}
@ -772,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
@ -786,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);

View File

@ -17,6 +17,9 @@ typedef struct {
int tn_size;
float tn_qscale;
/** Number of thumbnails to generate for videos */
int tn_count;
long max_media_buffer;
int read_subtitles;

View File

@ -76,6 +76,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *b
scan_ebook_ctx_t ebook_ctx = {
.content_size = ctx->content_size,
.tn_size = ctx->tn_size,
.enable_tn = TRUE,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
@ -137,7 +138,7 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
char *buf_pdf = malloc(buf_len);
memcpy(buf_pdf, buf, buf_len);
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);

View File

@ -5,6 +5,7 @@
typedef struct {
long content_size;
int enable_tn;
int tn_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
char *buf = malloc(entry_size);
archive_read_data(a, buf, entry_size);
APPEND_TN_META(doc, 1, 1) // Size unknown
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
free(buf);
}
@ -238,7 +238,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (read_doc_props(ctx, a, doc) != 0) {
break;
}
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
read_thumbnail(ctx, doc, a, entry);
}
}

View File

@ -5,6 +5,7 @@
#include "../scan.h"
typedef struct {
int enable_tn;
long content_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -69,7 +69,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
@ -157,7 +157,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
free(buf);
libraw_close(libraw_lib);
return;

View File

@ -8,6 +8,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;
} scan_raw_ctx_t;

View File

@ -350,9 +350,13 @@ TEST(Comic, ComicIssue160) {
load_doc_file("libscan-test-files/test_files/ebook/comic-segfault-issue-160.cbr", &f, &doc);
int tn_size_saved = comic_ctx.tn_size;
comic_ctx.tn_size = 0;
size_t size_before = store_size;
comic_ctx.enable_tn = FALSE;
parse_comic(&comic_ctx, &f, &doc);
comic_ctx.tn_size = tn_size_saved;
comic_ctx.enable_tn = tn_size_saved;
ASSERT_EQ(store_size, size_before);
cleanup(&doc, &f);
}
@ -669,8 +673,6 @@ TEST(Ooxml, Docx2Archive) {
ASSERT_EQ(get_meta(&LastSubDoc, MetaPages)->long_val, 1);
ASSERT_EQ(strlen(get_meta(&LastSubDoc, MetaContent)->str_val), 2780);
fprintf(stderr, "%s\n", get_meta(&LastSubDoc, MetaContent)->str_val);
ooxml_500_ctx.content_size = 500;
cleanup(&doc, &f);
@ -1111,6 +1113,7 @@ int main(int argc, char **argv) {
ebook_ctx.tesseract_lang = "eng";
ebook_ctx.tesseract_path = "./tessdata";
ebook_ctx.tn_size = 500;
ebook_ctx.enable_tn = TRUE;
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
@ -1124,12 +1127,14 @@ int main(int argc, char **argv) {
comic_ctx.tn_qscale = 1.0;
comic_ctx.tn_size = 500;
comic_ctx.enable_tn = TRUE;
comic_ctx.log = noop_log;
comic_ctx.logf = noop_logf;
comic_ctx.store = counter_store;
comic_big_ctx.tn_qscale = 1.0;
comic_big_ctx.tn_size = 5000;
comic_big_ctx.enable_tn = TRUE;
comic_big_ctx.log = noop_log;
comic_big_ctx.logf = noop_logf;
comic_big_ctx.store = counter_store;
@ -1138,10 +1143,12 @@ int main(int argc, char **argv) {
media_ctx.logf = noop_logf;
media_ctx.store = counter_store;
media_ctx.tn_size = 500;
media_ctx.tn_count = 1;
media_ctx.tn_qscale = 1.0;
media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;
ooxml_500_ctx.content_size = 500;
ooxml_500_ctx.enable_tn = TRUE;
ooxml_500_ctx.log = noop_log;
ooxml_500_ctx.logf = noop_logf;
ooxml_500_ctx.store = counter_store;
@ -1154,6 +1161,7 @@ int main(int argc, char **argv) {
raw_ctx.logf = noop_logf;
raw_ctx.store = counter_store;
raw_ctx.tn_size = 500;
raw_ctx.enable_tn = TRUE;
raw_ctx.tn_qscale = 5.0;
msdoc_ctx.log = noop_log;
@ -1161,12 +1169,14 @@ int main(int argc, char **argv) {
msdoc_ctx.store = counter_store;
msdoc_ctx.content_size = 500;
msdoc_ctx.tn_size = 500;
msdoc_ctx.enable_tn = TRUE;
msdoc_text_ctx.log = noop_log;
msdoc_text_ctx.logf = noop_logf;
msdoc_text_ctx.store = counter_store;
msdoc_text_ctx.content_size = 500;
msdoc_text_ctx.tn_size = 0;
msdoc_text_ctx.enable_tn = FALSE;
wpd_ctx.log = noop_log;
wpd_ctx.logf = noop_logf;