Compare commits

...

5 Commits

Author SHA1 Message Date
8ad9fc9e32 Fix caption path 2022-02-19 14:11:40 -05:00
f075b542fe Tweak mem-throttle option 2022-02-19 14:05:50 -05:00
3d4331b27d Add thumbnail-count option 2022-02-19 13:45:31 -05:00
a0db49e7d8 Add file page endpoint 2022-02-19 13:43:44 -05:00
065146ff8a Docker fixes 2022-02-19 13:43:44 -05:00
50 changed files with 840 additions and 23920 deletions

View File

@ -27,4 +27,5 @@ sist2
**/ext_libmobi
**/ext_libwpd
**/core
*.a
*.a
tmp_scan/

View File

@ -21,7 +21,6 @@ set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)
add_executable(sist2
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c

View File

@ -3,11 +3,11 @@ MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/
COPY . .
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=off -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN cmake -DSIST_PLATFORM=x64_linux -DSIST_DEBUG=on -DBUILD_TESTS=off -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake .
RUN make -j$(nproc)
RUN strip sist2
RUN strip sist2 || mv sist2_debug sist2
FROM ubuntu:21.10
FROM --platform="linux/amd64" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*

View File

@ -39,7 +39,7 @@
"index": false
},
"thumbnail": {
"type": "keyword",
"type": "integer",
"index": false
},
"videoc": {

View File

@ -1,32 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<title>sist2</title>
<link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"></head>
<body>
<noscript>
<style>
body {
height: initial;
}
</style>
<div style="text-align: center; margin-top: 100px">
<strong>
We're sorry but sist2 doesn't work properly without JavaScript enabled.
Please enable it to continue.
</strong>
<br/>
<strong>
Nous sommes désolés mais sist2 ne fonctionne pas correctement
si JavaScript est activé.
Veuillez l'activer pour continuer.
</strong>
</div>
</noscript>
<div id="app"></div>
<script type="text/javascript" src="js/chunk-vendors.js"></script><script type="text/javascript" src="js/index.js"></script></body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -310,4 +310,8 @@ mark {
display: inline-block;
width: 40%;
}
.pointer {
cursor: pointer;
}
</style>

View File

@ -62,8 +62,9 @@ export interface EsHit {
isPlayableImage: boolean
isAudio: boolean
hasThumbnail: boolean
tnW: number
tnH: number
hasVidPreview: boolean
/** Number of thumbnails available */
tnNum: number
}
highlight: {
name: string[] | undefined,
@ -134,8 +135,15 @@ class Sist2Api {
if ("thumbnail" in hit._source) {
hit._props.hasThumbnail = true;
hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
if (Number.isNaN(Number(hit._source.thumbnail))) {
// Backwards compatibility
hit._props.tnNum = 1;
hit._props.hasVidPreview = false;
} else {
hit._props.tnNum = Number(hit._source.thumbnail);
hit._props.hasVidPreview = hit._props.tnNum > 1;
}
}
switch (mimeCategory) {

View File

@ -30,6 +30,7 @@ export default {
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
{key: "tagline", value: this.$store.state.sist2Info.tagline},
{key: "dev", value: this.$store.state.sist2Info.dev},
{key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion},
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},

View File

@ -1,5 +1,6 @@
<template>
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`">
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`"
@click="$store.commit('busTnTouchStart', null)">
<b-card
no-body
img-top
@ -10,39 +11,7 @@
<ContentDiv :doc="doc"></ContentDiv>
<!-- Thumbnail-->
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
<div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
</div>
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div v-if="doc._props.isPlayableVideo" class="play">
<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
<path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
</svg>
</div>
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
class="fit card-img-top">
</div>
<FullThumbnail :doc="doc" :small-badge="smallBadge" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>
<!-- Audio player-->
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
@ -73,31 +42,19 @@ import TagContainer from "@/components/TagContainer.vue";
import DocFileTitle from "@/components/DocFileTitle.vue";
import DocInfoModal from "@/components/DocInfoModal.vue";
import ContentDiv from "@/components/ContentDiv.vue";
import FullThumbnail from "@/components/FullThumbnail";
export default {
components: {ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
components: {FullThumbnail, ContentDiv, DocInfoModal, DocFileTitle, TagContainer},
props: ["doc", "width"],
data() {
return {
ext: ext,
showInfo: false,
hover: false
}
},
computed: {
placeHolderStyle() {
const tokens = this.doc._source.thumbnail.split(",");
const w = Number(tokens[0]);
const h = Number(tokens[1]);
const MAX_HEIGHT = 400;
return {
height: `${Math.min((h / w) * this.width, MAX_HEIGHT)}px`,
}
},
smallBadge() {
return this.width < 150;
}
@ -119,31 +76,10 @@ export default {
}
});
},
onTnEnter() {
this.hover = true;
},
onTnLeave() {
this.hover = false;
},
tnHeight() {
return this.$refs.tn.height;
}
},
}
</script>
<style>
.img-wrapper {
position: relative;
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.pointer {
cursor: pointer;
}
.fit {
display: block;
min-width: 64px;
@ -153,15 +89,17 @@ export default {
width: auto;
height: auto;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
</style>
<style scoped>
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.padding-03 {
padding: 0.3rem;
}
@ -179,55 +117,11 @@ export default {
padding: 0.3rem;
}
.thumbnail-placeholder {
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.play {
position: absolute;
width: 25px;
height: 25px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.doc-card {
padding-left: 3px;
padding-right: 3px;
}
.small-badge {
padding: 1px 3px;
font-size: 70%;
}
.audio-fit {
height: 39px;
vertical-align: bottom;
display: inline;
width: 100%;
}
.sub-document .card {
background: #AB47BC1F !important;
}

View File

@ -2,7 +2,10 @@
<b-modal :visible="show" size="lg" :hide-footer="true" static lazy @close="$emit('close')" @hide="$emit('close')"
>
<template #modal-title>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">{{ doc._source.name + ext(doc) }}</h5>
<h5 class="modal-title" :title="doc._source.name + ext(doc)">
{{ doc._source.name + ext(doc) }}
<router-link :to="`/file?byId=${doc._id}`">#</router-link>
</h5>
</template>
<img v-if="doc._props.hasThumbnail" :src="`t/${doc._source.index}/${doc._id}`" alt="" class="fit card-img-top">

View File

@ -1,11 +1,13 @@
<template>
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}"
@mouseenter="onTnEnter()" @mouseleave="onTnLeave()" >
@mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
<!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
<div class="media ml-2">
<!-- Thumbnail-->
<div v-if="doc._props.hasThumbnail" class="align-self-start mr-2 wrapper-sm">
<div class="img-wrapper">
<div v-if="doc._props.isPlayableVideo" class="play">
@ -26,6 +28,7 @@
<FileIcon></FileIcon>
</div>
<!-- Doc line-->
<div class="doc-line ml-3">
<div style="display: flex">
<span class="info-icon" @click="showInfo = true"></span>
@ -154,6 +157,7 @@ export default {
.list-group-item .img-wrapper {
width: 88px;
height: 88px;
position: relative;
}
.fit-sm {

View File

@ -0,0 +1,173 @@
<template>
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()"
@touchstart="onTouchStart()">
<div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div
v-if="doc._props.isImage && !hover && doc._props.tnW / doc._props.tnH < 5"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ `${doc._source.width}x${doc._source.height}` }}</span>
</div>
<div v-if="(doc._props.isVideo || doc._props.isGif) && doc._source.duration > 0 && !hover"
class="card-img-overlay"
:class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
<div v-if="doc._props.isPlayableVideo" class="play">
<svg viewBox="0 0 494.942 494.942" xmlns="http://www.w3.org/2000/svg">
<path d="m35.353 0 424.236 247.471-424.236 247.471z"/>
</svg>
</div>
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="tnSrc"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="tnSrc" alt=""
class="fit card-img-top">
<ThumbnailProgressBar v-if="hover && doc._props.hasVidPreview"
:progress="(currentThumbnailNum + 1) / (doc._props.tnNum)"
></ThumbnailProgressBar>
</div>
</template>
<script>
import {humanTime} from "@/util";
import ThumbnailProgressBar from "@/components/ThumbnailProgressBar";
export default {
name: "FullThumbnail",
props: ["doc", "smallBadge"],
components: {ThumbnailProgressBar},
data() {
return {
hover: false,
currentThumbnailNum: 0,
timeoutId: null
}
},
created() {
this.$store.subscribe((mutation) => {
if (mutation.type === "busTnTouchStart" && mutation.payload !== this.doc._id) {
this.onTnLeave();
}
});
},
computed: {
tnSrc() {
const doc = this.doc;
const props = doc._props;
if (props.isGif && this.hover) {
return `f/${doc._id}`;
}
return (this.currentThumbnailNum === 0)
? `t/${doc._source.index}/${doc._id}`
: `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
},
},
methods: {
humanTime: humanTime,
onThumbnailClick() {
this.$emit("onThumbnailClick");
},
tnHeight() {
return this.$refs.tn.height;
},
tnWidth() {
return this.$refs.tn.width;
},
onTnEnter() {
this.hover = true;
if (this.doc._props.hasVidPreview) {
this.currentThumbnailNum += 1;
this.scheduleNextTnNum();
}
},
onTnLeave() {
this.currentThumbnailNum = 0;
this.hover = false;
if (this.timeoutId !== null) {
window.clearTimeout(this.timeoutId);
this.timeoutId = null;
}
},
scheduleNextTnNum() {
const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
this.timeoutId = window.setTimeout(() => {
if (!this.hover) {
return;
}
this.scheduleNextTnNum();
if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
this.currentThumbnailNum = 0;
} else {
this.currentThumbnailNum += 1;
}
}, INTERVAL);
},
onTouchStart() {
this.$store.commit("busTnTouchStart", this.doc._id);
if (!this.hover) {
this.onTnEnter()
}
},
}
}
</script>
<style scoped>
.img-wrapper {
position: relative;
}
.img-wrapper:hover svg {
fill: rgba(0, 0, 0, 1);
}
.card-img-top {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
.play {
position: absolute;
width: 25px;
height: 25px;
left: 50%;
top: 50%;
transform: translate(-50%, -50%);
pointer-events: none;
}
.play svg {
fill: rgba(0, 0, 0, 0.7);
}
.badge-resolution {
color: #212529;
background-color: #FFC107;
}
.card-img-overlay {
pointer-events: none;
padding: 0.75rem;
bottom: unset;
top: 0;
left: unset;
right: unset;
}
.small-badge {
padding: 1px 3px;
font-size: 70%;
}
</style>

View File

@ -1,6 +1,5 @@
<template>
<b-table :items="tableItems" small borderless responsive="md" thead-class="hidden" class="mb-0 mt-4">
<template #cell(value)="data">
<span v-if="'html' in data.item" v-html="data.item.html"></span>
<span v-else>{{ data.value }}</span>
@ -33,12 +32,18 @@ function dmsToDecimal(dms, ref) {
export default {
name: "InfoTable",
props: ["doc"],
data() {
return {
indexName: "loading..."
}
},
computed: {
tableItems() {
this.indexName;
const src = this.doc._source;
const items = [
{key: "index", value: `[${this.$store.getters.indexMap[src.index].name}]`},
{key: "index", value: `[${this.indexName}]`},
{key: "mtime", value: humanDate(src.mtime)},
{key: "mime", value: src.mime},
{key: "size", value: humanFileSize(src.size)},
@ -85,7 +90,16 @@ export default {
return items;
}
}
},
mounted() {
if (this.$store.getters.indexMap[this.doc.index]) {
this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
}
window.setTimeout(() => {
this.indexName = this.$store.getters.indexMap[this.doc._source.index].name
}, 500)
},
}
</script>

View File

@ -3,7 +3,7 @@
<p>
<b>{{
`[${$store.getters.indices.find(i => i.id === hit._source.index).name}]`
}}</b>{{ `/${hit._source.path}/${hit._source.name}${ext(hit)}` }}
}}</b>{{ `${hit._source.path === '' ? '' : '/'}${hit._source.path}/${hit._source.name}${ext(hit)}` }}
</p>
<p style="margin-top: -1em">
<span v-if="hit._source.width">{{ `${hit._source.width}x${hit._source.height}`}}</span>

View File

@ -0,0 +1,40 @@
<template>
<div class="thumbnail-progress-bar" :style="{width: `${percentProgress}%`}"></div>
</template>
<script>
export default {
name: "ThumbnailProgressBar",
props: ["doc", "progress"],
computed: {
percentProgress() {
return Math.min(Math.max(this.progress * 100, 0), 100);
}
}
}
</script>
<style scoped>
.thumbnail-progress-bar {
position: absolute;
left: 0;
bottom: 0;
height: 4px;
background: #2196f3AA;
z-index: 9;
}
.theme-black .thumbnail-progress-bar {
background: rgba(0, 188, 212, 0.95);
}
.sub-document .thumbnail-progress-bar {
max-width: calc(100% - 8px);
left: 4px;
}
</style>

View File

@ -1,5 +1,8 @@
export default {
en: {
filePage: {
notFound: "Not found"
},
searchBar: {
simple: "Search",
advanced: "Advanced search",
@ -68,7 +71,8 @@ export default {
hideDuplicates: "Hide duplicate results based on checksum",
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time",
useDatePicker: "Use a Date Picker component rather than a slider"
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms"
},
queryMode: {
simple: "Simple",
@ -162,6 +166,9 @@ export default {
},
},
fr: {
filePage: {
notFound: "Ficher introuvable"
},
searchBar: {
simple: "Recherche",
advanced: "Recherche avancée",
@ -231,7 +238,8 @@ export default {
hideDuplicates: "Masquer les résultats en double",
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider"
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
},
queryMode: {
simple: "Simple",
@ -326,6 +334,9 @@ export default {
},
},
"zh-CN": {
filePage: {
notFound: "未找到"
},
searchBar: {
simple: "搜索",
advanced: "高级搜索",
@ -394,7 +405,8 @@ export default {
hideDuplicates: "使用校验码隐藏重复结果",
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新",
useDatePicker: "使用日期选择器组件而不是滑块"
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位"
},
queryMode: {
simple: "简单",

View File

@ -3,6 +3,7 @@ import VueRouter, {RouteConfig} from "vue-router"
import StatsPage from "../views/StatsPage.vue"
import Configuration from "../views/Configuration.vue"
import SearchPage from "@/views/SearchPage.vue";
import FilePage from "@/views/FilePage.vue";
Vue.use(VueRouter)
@ -21,6 +22,11 @@ const routes: Array<RouteConfig> = [
path: "/config",
name: "Configuration",
component: Configuration
},
{
path: "/file",
name: "File",
component: FilePage
}
]

View File

@ -50,6 +50,7 @@ export default new Vuex.Store({
optHideLegacy: false,
optUpdateMimeMap: false,
optUseDatePicker: false,
optVidPreviewInterval: 700,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@ -159,6 +160,7 @@ export default new Vuex.Store({
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@ -174,6 +176,12 @@ export default new Vuex.Store({
busSearch: () => {
// noop
},
busTouchEnd: () => {
// noop
},
busTnTouchStart: (doc_id) => {
// noop
},
},
actions: {
setSist2Info: (store, val) => {
@ -369,5 +377,6 @@ export default new Vuex.Store({
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
}
})

View File

@ -1,8 +1,12 @@
import {EsHit} from "@/Sist2Api";
export function ext(hit: EsHit) {
return Object.prototype.hasOwnProperty.call(hit._source, "extension")
&& hit["_source"]["extension"] !== "" ? "." + hit["_source"]["extension"] : "";
return srcExt(hit._source)
}
export function srcExt(src) {
return Object.prototype.hasOwnProperty.call(src, "extension")
&& src["extension"] !== "" ? "." + src["extension"] : "";
}
export function strUnescape(str: string): string {

View File

@ -85,6 +85,10 @@
<label>{{ $t("opt.slideDuration") }}</label>
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
@input="setOptLightboxSlideDuration"></b-form-input>
<label>{{ $t("opt.vidPreviewInterval") }}</label>
<b-form-input :value="optVidPreviewInterval" type="number" min="50"
@input="setOptVidPreviewInterval"></b-form-input>
</b-card>
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
@ -234,6 +238,7 @@ export default {
"optHideLegacy",
"optUpdateMimeMap",
"optUseDatePicker",
"optVidPreviewInterval",
]),
clientWidth() {
return window.innerWidth;
@ -279,6 +284,7 @@ export default {
"setOptHideLegacy",
"setOptUpdateMimeMap",
"setOptUseDatePicker",
"setOptVidPreviewInterval",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@ -0,0 +1,131 @@
<template>
<div style="margin-left: auto; margin-right: auto;" class="container">
<Preloader v-if="loading"></Preloader>
<b-card v-else-if="!loading && found">
<b-card-title :title="doc._source.name + ext(doc)">
{{ doc._source.name + ext(doc) }}
</b-card-title>
<!-- Thumbnail-->
<div style="position: relative; margin-left: auto; margin-right: auto; text-align: center">
<FullThumbnail :doc="doc" :small-badge="false" @onThumbnailClick="onThumbnailClick()"></FullThumbnail>
</div>
<!-- Audio player-->
<audio v-if="doc._props.isAudio" ref="audio" preload="none" class="audio-fit fit" controls
:type="doc._source.mime"
:src="`f/${doc._id}`"></audio>
<InfoTable :doc="doc" v-if="doc"></InfoTable>
<div v-if="doc._source.content" class="content-div">{{ doc._source.content }}</div>
</b-card>
<div v-else>
<b-card>
<b-card-title>{{ $t("filePage.notFound") }}</b-card-title>
</b-card>
</div>
</div>
</template>
<script>
import Preloader from "@/components/Preloader.vue";
import InfoTable from "@/components/InfoTable.vue";
import Sist2Api from "@/Sist2Api";
import {ext} from "@/util";
import Vue from "vue";
import sist2 from "@/Sist2Api";
import FullThumbnail from "@/components/FullThumbnail";
export default Vue.extend({
name: "FilePage",
components: {
FullThumbnail,
Preloader,
InfoTable
},
data() {
return {
loading: true,
found: false,
doc: null
}
},
methods: {
ext: ext,
onThumbnailClick() {
window.open(`/f/${this.doc._id}`, "_blank");
},
findById(id) {
return {
query: {
bool: {
must: [
{
match: {
"_id": id
}
}
]
}
},
size: 1
}
},
findByName(name) {
return {
query: {
bool: {
must: [
{
match: {
"name": name
}
}
]
}
},
size: 1
}
}
},
mounted() {
if (this.$store.state.sist2Info === null) {
sist2.getSist2Info().then(data => {
this.$store.dispatch("setSist2Info", data);
this.$store.commit("setIndices", data.indices);
});
}
let query = null;
if (this.$route.query.byId) {
query = this.findById(this.$route.query.byId);
} else if (this.$route.query.byName) {
query = this.findByName(this.$route.query.byName);
}
if (query) {
Sist2Api.esQuery(query).then(result => {
if (result.hits.hits.length === 0) {
this.found = false;
} else {
this.doc = result.hits.hits[0];
this.found = true;
}
this.loading = false;
});
} else {
this.loading = false;
this.found = false;
}
}
});
</script>
<style scoped>
.img-wrapper {
display: inline-block;
}
</style>

View File

@ -100,6 +100,10 @@ export default Vue.extend({
...mapGetters(["indices", "optDisplay"]),
},
mounted() {
// Handle touch events
window.ontouchend = () => this.$store.commit("busTouchEnd");
window.ontouchcancel = this.$store.commit("busTouchEnd");
this.search = _debounce(async (clear: boolean) => {
if (clear) {
await this.clearResults();

View File

@ -5,7 +5,8 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 1
#define DEFAULT_SIZE 300
#define DEFAULT_THUMBNAIL_SIZE 500
#define DEFAULT_THUMBNAIL_COUNT 1
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
@ -96,7 +97,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->path = abs_path;
}
if (args->incremental != NULL) {
if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
@ -104,32 +105,39 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
if (args->quality == 0) {
args->quality = DEFAULT_QUALITY;
} else if (args->quality < 1 || args->quality > 31) {
fprintf(stderr, "Invalid quality: %f\n", args->quality);
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
args->tn_quality = DEFAULT_QUALITY;
} else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
args->tn_quality);
return 1;
}
if (args->size == 0) {
args->size = DEFAULT_SIZE;
} else if (args->size > 0 && args->size < 32) {
printf("Invalid size: %d\n", args->content_size);
if (args->tn_size == OPTION_VALUE_UNSPECIFIED) {
args->tn_size = DEFAULT_THUMBNAIL_SIZE;
} else if (args->tn_size < 32) {
printf("Invalid value --thumbnail-size argument: %d. Must be greater than 32 pixels.\n", args->tn_size);
return 1;
}
if (args->content_size == 0) {
if (args->tn_count == OPTION_VALUE_UNSPECIFIED) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
args->content_size = DEFAULT_CONTENT_SIZE;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
return 1;
}
if (args->output == NULL) {
if (args->output == OPTION_VALUE_UNSPECIFIED) {
args->output = malloc(strlen(DEFAULT_OUTPUT) + 1);
strcpy(args->output, DEFAULT_OUTPUT);
} else {
@ -148,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->depth += 1;
}
if (args->name == NULL) {
if (args->name == OPTION_VALUE_UNSPECIFIED) {
args->name = g_path_get_basename(args->output);
} else {
char *tmp = malloc(strlen(args->name) + 1);
@ -156,11 +164,11 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->name = tmp;
}
if (args->rewrite_url == NULL) {
if (args->rewrite_url == OPTION_VALUE_UNSPECIFIED) {
args->rewrite_url = DEFAULT_REWRITE_URL;
}
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
if (args->archive == OPTION_VALUE_UNSPECIFIED || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
@ -173,17 +181,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->ocr_images && args->tesseract_lang == NULL) {
if (args->ocr_images && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images");
return 1;
}
if (args->ocr_ebooks && args->tesseract_lang == NULL) {
if (args->ocr_ebooks && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks");
return 1;
}
if (args->tesseract_lang != NULL) {
if (args->tesseract_lang != OPTION_VALUE_UNSPECIFIED) {
if (!args->ocr_ebooks && !args->ocr_images) {
fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images");
@ -227,7 +235,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tesseract_path = trained_data_path;
}
if (args->exclude_regex != NULL) {
if (args->exclude_regex != OPTION_VALUE_UNSPECIFIED) {
const char *error;
int error_offset;
@ -247,21 +255,21 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
if (args->treemap_threshold_str == OPTION_VALUE_UNSPECIFIED) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
if (args->max_memory_buffer_mib == OPTION_VALUE_UNSPECIFIED) {
args->max_memory_buffer_mib = DEFAULT_MAX_MEM_BUFFER;
}
if (args->scan_mem_limit <= 0) {
args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
if (args->scan_mem_limit_mib == OPTION_VALUE_UNSPECIFIED || args->scan_mem_limit_mib == OPTION_VALUE_DISABLE) {
args->scan_mem_limit_mib = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
}
if (args->list_path != NULL) {
if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
if (strcmp(args->list_path, "-") == 0) {
args->list_file = stdin;
LOG_DEBUG("cli.c", "Using stdin as list file")
@ -274,8 +282,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)
@ -292,7 +301,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg fast=%d", args->fast)
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
LOG_DEBUGF("cli.c", "arg max_memory_buffer_mib=%d", args->max_memory_buffer_mib)
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
return 0;

View File

@ -5,12 +5,15 @@
#include "libscan/arc/arc.h"
#define OPTION_VALUE_DISABLE (-1)
#define OPTION_VALUE_UNSPECIFIED (0)
typedef struct scan_args {
float quality;
int size;
float tn_quality;
int tn_size;
int content_size;
int threads;
int scan_mem_limit;
int scan_mem_limit_mib;
char *incremental;
char *output;
char *rewrite_url;
@ -28,8 +31,10 @@ typedef struct scan_args {
int fast;
const char* treemap_threshold_str;
double treemap_threshold;
int max_memory_buffer;
int max_memory_buffer_mib;
int read_subtitles;
/** Number of thumbnails to generate */
int tn_count;
int fast_epub;
int calculate_checksums;
char *list_path;

File diff suppressed because one or more lines are too long

View File

@ -133,6 +133,7 @@ char *build_json_string(document_t *doc) {
while (meta != NULL) {
switch (meta->key) {
case MetaThumbnail:
case MetaPages:
case MetaWidth:
case MetaHeight:
@ -163,7 +164,6 @@ char *build_json_string(document_t *doc) {
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:

View File

@ -55,7 +55,16 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}

View File

@ -189,37 +189,41 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
ScanCtx.comic_ctx.tn_size = args->tn_size;
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ebook_ctx.tn_size = args->tn_size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
ScanCtx.ebook_ctx.tn_qscale = args->quality;
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
ScanCtx.media_ctx.tn_size = args->tn_size;
ScanCtx.media_ctx.tn_count = args->tn_count;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer_mib * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
ScanCtx.media_ctx.read_subtitles = args->tn_count;
if (args->ocr_images) {
ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang;
@ -228,6 +232,7 @@ void initialize_scan_context(scan_args_t *args) {
init_media();
// OOXML
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
@ -244,7 +249,8 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
ScanCtx.msdoc_ctx.tn_size = args->tn_size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
@ -253,7 +259,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.threads = args->threads;
ScanCtx.depth = args->depth;
ScanCtx.mem_limit = args->scan_mem_limit * 1024 * 1024;
ScanCtx.mem_limit = (size_t) args->scan_mem_limit_mib * 1024 * 1024;
strncpy(ScanCtx.index.path, args->output, sizeof(ScanCtx.index.path));
strncpy(ScanCtx.index.desc.name, args->name, sizeof(ScanCtx.index.desc.name));
@ -263,8 +269,9 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
ScanCtx.raw_ctx.tn_size = args->tn_size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
@ -309,7 +316,8 @@ void load_incremental_index(const scan_args_t *args) {
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1);
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@ -320,7 +328,7 @@ void load_incremental_index(const scan_args_t *args) {
* 1. Build original_table - new_table => delete_table
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
*/
void save_incremental_index(scan_args_t* args) {
void save_incremental_index(scan_args_t *args) {
char dst_path[PATH_MAX];
char store_path[PATH_MAX];
char file_path[PATH_MAX];
@ -330,15 +338,17 @@ void save_incremental_index(scan_args_t* args) {
store_t *source = store_create(store_path, STORE_SIZE_TN);
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
READ_INDICES(file_path, args->incremental, incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
READ_INDICES(file_path, args->incremental,
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
perror("incremental_delete"), 1);
writer_cleanup();
READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
READ_INDICES(file_path, args->incremental,
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
perror("incremental_copy"), 1);
writer_cleanup();
@ -412,6 +422,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
save_incremental_index(args);
@ -551,13 +563,34 @@ void sist2_web(web_args_t *args) {
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
strcpy(WebCtx.indices[i].path, abs_path);
printf("Loaded index: %s\n", WebCtx.indices[i].desc.name);
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
free(abs_path);
}
serve(args->listen_address);
}
/**
* Callback to handle options such that
*
* Unspecified -> 0: Set to default value
* Specified "0" -> -1: Disable the option (ex. don't generate thumbnails)
* Negative number -> Raise error
* Specified a valid number -> Continue as normal
*/
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
int specified_value = *(int *) option->value;
if (specified_value == 0) {
*((int *) option->data) = OPTION_VALUE_DISABLE;
}
if (specified_value < 0) {
fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
exit(1);
}
}
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
@ -587,13 +620,21 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_INTEGER(0, "mem-throttle", &scan_args->scan_mem_limit_mib,
"Total memory threshold in MiB for scan throttling. DEFAULT=0",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->scan_mem_limit_mib),
OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
@ -616,8 +657,8 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
"(see USAGE.md). DEFAULT: 0.0005"),
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer,
"Maximum memory buffer size per thread in MB for files inside archives "
OPT_INTEGER(0, "mem-buffer", &scan_args->max_memory_buffer_mib,
"Maximum memory buffer size per thread in MiB for files inside archives "
"(see USAGE.md). DEFAULT: 2000"),
OPT_BOOLEAN(0, "read-subtitles", &scan_args->read_subtitles, "Read subtitles from media files."),
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
@ -633,7 +674,7 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),

View File

@ -60,6 +60,8 @@ static const char *const Version = VERSION;
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.3"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@ -154,7 +154,7 @@ static void *tpool_worker(void *arg) {
int stuck_notified = 0;
int throttle_ms = 0;
while (1) {
while (TRUE) {
pthread_mutex_lock(&pool->work_mutex);
if (pool->stop) {
break;

View File

@ -8,12 +8,16 @@
#include <src/ctx.h>
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
"Server: sist2/" VERSION "\r\n"
HTTP_SERVER_HEADER
"Content-Length: %d\r\n"
"%s\r\n\r\n",
status_code, "OK",
@ -60,7 +64,7 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -70,7 +74,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -138,10 +142,16 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int parse_tn_num = FALSE;
if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
if (hm->uri.len != 68 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
parse_tn_num = TRUE;
}
char arg_file_md5[MD5_STR_LENGTH];
@ -158,12 +168,25 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
char *data;
size_t data_len = 0;
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
}
if (data_len != 0) {
send_response_line(
nc, 200, data_len,
@ -173,7 +196,7 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
mg_send(nc, data, data_len);
free(data);
} else {
mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
}
@ -182,7 +205,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, "", "Invalid request");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@ -226,6 +249,11 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
"serving file from disk might not work as expected.")
}
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
@ -246,7 +274,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
"Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
@ -273,6 +301,7 @@ void index_info(struct mg_connection *nc) {
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
@ -314,7 +343,7 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -328,14 +357,14 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -350,7 +379,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -369,7 +398,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
@ -383,7 +412,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -452,7 +481,7 @@ tag_req_t *parse_tag_request(cJSON *json) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -462,14 +491,14 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -615,7 +644,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
mg_http_reply(nc, 404, "", "Page not found");
HTTP_REPLY_NOT_FOUND
}
} else if (ev == MG_EV_POLL) {
@ -645,7 +674,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
free(tmp);
}
mg_http_reply(nc, 500, "", "");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
"Elasticsearch error, see server logs.");
}
free_response(r);
@ -659,7 +689,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address);
LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)
struct mg_mgr mgr;
mg_mgr_init(&mgr);

File diff suppressed because one or more lines are too long

View File

@ -6,10 +6,26 @@ set(CMAKE_C_STANDARD 11)
option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
add_compile_definitions(
antiword
NDEBUG
)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
else()
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
endif()
add_library(
scan

View File

@ -12,7 +12,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive_entry *entry = NULL;
arc_data_t arc_data;
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}
@ -44,7 +44,20 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
break;
}
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
scan_media_ctx_t media_ctx = {
.tn_count = ctx->enable_tn ? 1 : 0,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL,
.read_subtitles = FALSE,
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);
free(buf);
if (ret == TRUE) {

View File

@ -9,6 +9,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;

View File

@ -155,7 +155,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, pixmap->w, pixmap->h)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
@ -283,7 +283,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
APPEND_LONG_META(doc, MetaPages, page_count)
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc);
@ -404,7 +404,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}

View File

@ -6,6 +6,7 @@
typedef struct {
long content_size;
int tn_size;
int enable_tn;
const char *tesseract_lang;
const char *tesseract_path;
pthread_mutex_t mupdf_mutex;

View File

@ -176,7 +176,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
strcpy(meta_name->str_val, font_name);
APPEND_META(doc, meta_name)
if (ctx->enable_tn == TRUE) {
if (!ctx->enable_tn) {
FT_Done_Face(face);
free(buf);
return;
@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_TN_META(doc, dimensions.width, dimensions.height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);

View File

@ -35,11 +35,6 @@
meta_long->long_val = value; \
APPEND_META(doc, meta_long)}
#define APPEND_TN_META(doc, width, height) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
meta_str->key = MetaThumbnail; \
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
APPEND_META(doc, meta_str)}
#define APPEND_META(doc, meta) \
meta->next = NULL;\

View File

@ -6,7 +6,6 @@
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STREAM_IS_IMAGE (stream->nb_frames <= 1)
#define STORE_AS_IS ((void*)-1)
@ -398,6 +397,110 @@ void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *dec
av_frame_free(&rgb_frame);
}
#define SAVE_THUMBNAIL_OK 0
#define SAVE_THUMBNAIL_SKIPPED 1
#define SAVE_THUMBNAIL_FAILED 2
int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder,
AVStream *stream, int video_stream, document_t *doc, double seek_ratio,
int thumbnail_index) {
if (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ok = FALSE;
double target_timestamp = (double) pFormatCtx->duration * seek_ratio;
long ts = (long) target_timestamp;
int seek_ret = avformat_seek_file(
// Allow +- 1s
pFormatCtx, -1, ts - AV_TIME_BASE, ts, ts + AV_TIME_BASE,
0
);
if (seek_ret == 0) {
seek_ok = TRUE;
} else {
CTX_LOG_DEBUGF(
doc->filepath,
"(media.c) Could not seek media file: %s", av_err2str(seek_ret)
)
}
if (seek_ok == FALSE && thumbnail_index != 0) {
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
return SAVE_THUMBNAIL_FAILED;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
if (thumbnail_index == 0) {
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
return SAVE_THUMBNAIL_FAILED;
}
int return_value;
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
return_value = SAVE_THUMBNAIL_OK;
// TO FIX: the 2nd rendered frame is always broken, just skip it until
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
return_value = SAVE_THUMBNAIL_SKIPPED;
}
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
return return_value;
}
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
@ -458,7 +561,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
append_audio_meta(pFormatCtx, doc);
}
if (video_stream != -1 && ctx->tn_size > 0) {
if (video_stream != -1 && ctx->tn_count > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@ -473,69 +576,38 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
(long) ((double) stream->duration * 0.10), 0);
if (seek_ret == 0) {
break;
}
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
? 0.10
: 1.0 / (thumbnails_to_generate + 1);
int number_of_thumbnails_generated = 0;
int save_thumbnail_ret;
for (int i = 0; i < thumbnails_to_generate; i++) {
double seek_ratio = seek_increment * i + seek_increment * 0.9;
save_thumbnail_ret = decode_frame_and_save_thumbnail(ctx, pFormatCtx, decoder, stream, video_stream, doc,
seek_ratio, i);
if (save_thumbnail_ret == SAVE_THUMBNAIL_FAILED) {
break;
}
if (save_thumbnail_ret == SAVE_THUMBNAIL_OK) {
number_of_thumbnails_generated += 1;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
if (number_of_thumbnails_generated > 0) {
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
}
if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
}
@ -772,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
@ -786,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);

View File

@ -17,6 +17,9 @@ typedef struct {
int tn_size;
float tn_qscale;
/** Number of thumbnails to generate for videos */
int tn_count;
long max_media_buffer;
int read_subtitles;

View File

@ -76,6 +76,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *b
scan_ebook_ctx_t ebook_ctx = {
.content_size = ctx->content_size,
.tn_size = ctx->tn_size,
.enable_tn = TRUE,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
@ -137,7 +138,7 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
char *buf_pdf = malloc(buf_len);
memcpy(buf_pdf, buf, buf_len);
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);

View File

@ -5,6 +5,7 @@
typedef struct {
long content_size;
int enable_tn;
int tn_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
char *buf = malloc(entry_size);
archive_read_data(a, buf, entry_size);
APPEND_TN_META(doc, 1, 1) // Size unknown
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
free(buf);
}
@ -238,7 +238,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (read_doc_props(ctx, a, doc) != 0) {
break;
}
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
read_thumbnail(ctx, doc, a, entry);
}
}

View File

@ -5,6 +5,7 @@
#include "../scan.h"
typedef struct {
int enable_tn;
long content_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -69,7 +69,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
@ -157,7 +157,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
free(buf);
libraw_close(libraw_lib);
return;

View File

@ -8,6 +8,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;
} scan_raw_ctx_t;

View File

@ -350,9 +350,13 @@ TEST(Comic, ComicIssue160) {
load_doc_file("libscan-test-files/test_files/ebook/comic-segfault-issue-160.cbr", &f, &doc);
int tn_size_saved = comic_ctx.tn_size;
comic_ctx.tn_size = 0;
size_t size_before = store_size;
comic_ctx.enable_tn = FALSE;
parse_comic(&comic_ctx, &f, &doc);
comic_ctx.tn_size = tn_size_saved;
comic_ctx.enable_tn = tn_size_saved;
ASSERT_EQ(store_size, size_before);
cleanup(&doc, &f);
}
@ -669,8 +673,6 @@ TEST(Ooxml, Docx2Archive) {
ASSERT_EQ(get_meta(&LastSubDoc, MetaPages)->long_val, 1);
ASSERT_EQ(strlen(get_meta(&LastSubDoc, MetaContent)->str_val), 2780);
fprintf(stderr, "%s\n", get_meta(&LastSubDoc, MetaContent)->str_val);
ooxml_500_ctx.content_size = 500;
cleanup(&doc, &f);
@ -1111,6 +1113,7 @@ int main(int argc, char **argv) {
ebook_ctx.tesseract_lang = "eng";
ebook_ctx.tesseract_path = "./tessdata";
ebook_ctx.tn_size = 500;
ebook_ctx.enable_tn = TRUE;
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
@ -1124,12 +1127,14 @@ int main(int argc, char **argv) {
comic_ctx.tn_qscale = 1.0;
comic_ctx.tn_size = 500;
comic_ctx.enable_tn = TRUE;
comic_ctx.log = noop_log;
comic_ctx.logf = noop_logf;
comic_ctx.store = counter_store;
comic_big_ctx.tn_qscale = 1.0;
comic_big_ctx.tn_size = 5000;
comic_big_ctx.enable_tn = TRUE;
comic_big_ctx.log = noop_log;
comic_big_ctx.logf = noop_logf;
comic_big_ctx.store = counter_store;
@ -1138,10 +1143,12 @@ int main(int argc, char **argv) {
media_ctx.logf = noop_logf;
media_ctx.store = counter_store;
media_ctx.tn_size = 500;
media_ctx.tn_count = 1;
media_ctx.tn_qscale = 1.0;
media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;
ooxml_500_ctx.content_size = 500;
ooxml_500_ctx.enable_tn = TRUE;
ooxml_500_ctx.log = noop_log;
ooxml_500_ctx.logf = noop_logf;
ooxml_500_ctx.store = counter_store;
@ -1154,6 +1161,7 @@ int main(int argc, char **argv) {
raw_ctx.logf = noop_logf;
raw_ctx.store = counter_store;
raw_ctx.tn_size = 500;
raw_ctx.enable_tn = TRUE;
raw_ctx.tn_qscale = 5.0;
msdoc_ctx.log = noop_log;
@ -1161,12 +1169,14 @@ int main(int argc, char **argv) {
msdoc_ctx.store = counter_store;
msdoc_ctx.content_size = 500;
msdoc_ctx.tn_size = 500;
msdoc_ctx.enable_tn = TRUE;
msdoc_text_ctx.log = noop_log;
msdoc_text_ctx.logf = noop_logf;
msdoc_text_ctx.store = counter_store;
msdoc_text_ctx.content_size = 500;
msdoc_text_ctx.tn_size = 0;
msdoc_text_ctx.enable_tn = FALSE;
wpd_ctx.log = noop_log;
wpd_ctx.logf = noop_logf;