Add thumbnail-count option

This commit is contained in:
simon987 2022-02-19 13:43:33 -05:00
parent a0db49e7d8
commit 3d4331b27d
37 changed files with 651 additions and 23780 deletions

View File

@ -39,7 +39,7 @@
"index": false
},
"thumbnail": {
"type": "keyword",
"type": "integer",
"index": false
},
"videoc": {

View File

@ -1,32 +1,3 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no'/>
<title>sist2</title>
<link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"></head>
<body>
<noscript>
<style>
body {
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,user-scalable=no"><title>sist2</title><link href="css/chunk-vendors.css" rel="preload" as="style"><link href="css/index.css" rel="preload" as="style"><link href="js/chunk-vendors.js" rel="preload" as="script"><link href="js/index.js" rel="preload" as="script"><link href="css/chunk-vendors.css" rel="stylesheet"><link href="css/index.css" rel="stylesheet"></head><body><noscript><style>body {
height: initial;
}
</style>
<div style="text-align: center; margin-top: 100px">
<strong>
We're sorry but sist2 doesn't work properly without JavaScript enabled.
Please enable it to continue.
</strong>
<br/>
<strong>
Nous sommes désolés mais sist2 ne fonctionne pas correctement
si JavaScript est activé.
Veuillez l'activer pour continuer.
</strong>
</div>
</noscript>
<div id="app"></div>
<script type="text/javascript" src="js/chunk-vendors.js"></script><script type="text/javascript" src="js/index.js"></script></body>
</html>
}</style><div style="text-align: center; margin-top: 100px"><strong>We're sorry but sist2 doesn't work properly without JavaScript enabled. Please enable it to continue.</strong><br><strong>Nous sommes désolés mais sist2 ne fonctionne pas correctement si JavaScript est activé. Veuillez l'activer pour continuer.</strong></div></noscript><div id="app"></div><script src="js/chunk-vendors.js"></script><script src="js/index.js"></script></body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -62,8 +62,9 @@ export interface EsHit {
isPlayableImage: boolean
isAudio: boolean
hasThumbnail: boolean
tnW: number
tnH: number
hasVidPreview: boolean
/** Number of thumbnails available */
tnNum: number
}
highlight: {
name: string[] | undefined,
@ -134,8 +135,15 @@ class Sist2Api {
if ("thumbnail" in hit._source) {
hit._props.hasThumbnail = true;
hit._props.tnW = Number(hit._source.thumbnail.split(",")[0]);
hit._props.tnH = Number(hit._source.thumbnail.split(",")[1]);
if (Number.isNaN(Number(hit._source.thumbnail))) {
// Backwards compatibility
hit._props.tnNum = 1;
hit._props.hasVidPreview = false;
} else {
hit._props.tnNum = Number(hit._source.thumbnail);
hit._props.hasVidPreview = hit._props.tnNum > 1;
}
}
switch (mimeCategory) {

View File

@ -30,6 +30,7 @@ export default {
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
{key: "tagline", value: this.$store.state.sist2Info.tagline},
{key: "dev", value: this.$store.state.sist2Info.dev},
{key: "mongooseVersion", value: this.$store.state.sist2Info.mongooseVersion},
{key: "esVersion", value: this.$store.state.sist2Info.esVersion},
{key: "esVersionSupported", value: this.$store.state.sist2Info.esVersionSupported},
{key: "esVersionLegacy", value: this.$store.state.sist2Info.esVersionLegacy},

View File

@ -1,5 +1,6 @@
<template>
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`">
<div class="doc-card" :class="{'sub-document': doc._props.isSubDocument}" :style="`width: ${width}px`"
@click="$store.commit('busTnTouchStart', null)">
<b-card
no-body
img-top

View File

@ -1,5 +1,6 @@
<template>
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()">
<div v-if="doc._props.hasThumbnail" class="img-wrapper" @mouseenter="onTnEnter()" @mouseleave="onTnLeave()"
@touchstart="onTouchStart()">
<div v-if="doc._props.isAudio" class="card-img-overlay" :class="{'small-badge': smallBadge}">
<span class="badge badge-resolution">{{ humanTime(doc._source.duration) }}</span>
</div>
@ -25,26 +26,53 @@
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
:src="tnSrc"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
<img v-else :src="tnSrc" alt=""
class="fit card-img-top">
<ThumbnailProgressBar v-if="hover && doc._props.hasVidPreview"
:progress="(currentThumbnailNum + 1) / (doc._props.tnNum)"
></ThumbnailProgressBar>
</div>
</template>
<script>
import {humanTime} from "@/util";
import ThumbnailProgressBar from "@/components/ThumbnailProgressBar";
export default {
name: "FullThumbnail",
props: ["doc", "smallBadge"],
components: {ThumbnailProgressBar},
data() {
return {
hover: false
hover: false,
currentThumbnailNum: 0,
timeoutId: null
}
},
created() {
this.$store.subscribe((mutation) => {
if (mutation.type === "busTnTouchStart" && mutation.payload !== this.doc._id) {
this.onTnLeave();
}
});
},
computed: {
tnSrc() {
const doc = this.doc;
const props = doc._props;
if (props.isGif && this.hover) {
return `f/${doc._id}`;
}
return (this.currentThumbnailNum === 0)
? `t/${doc._source.index}/${doc._id}`
: `t/${doc._source.index}/${doc._id}${String(this.currentThumbnailNum).padStart(4, "0")}`;
},
},
methods: {
humanTime: humanTime,
onThumbnailClick() {
@ -53,11 +81,43 @@ export default {
tnHeight() {
return this.$refs.tn.height;
},
tnWidth() {
return this.$refs.tn.width;
},
onTnEnter() {
this.hover = true;
if (this.doc._props.hasVidPreview) {
this.currentThumbnailNum += 1;
this.scheduleNextTnNum();
}
},
onTnLeave() {
this.currentThumbnailNum = 0;
this.hover = false;
if (this.timeoutId !== null) {
window.clearTimeout(this.timeoutId);
this.timeoutId = null;
}
},
scheduleNextTnNum() {
const INTERVAL = this.$store.state.optVidPreviewInterval ?? 700;
this.timeoutId = window.setTimeout(() => {
if (!this.hover) {
return;
}
this.scheduleNextTnNum();
if (this.currentThumbnailNum === this.doc._props.tnNum - 1) {
this.currentThumbnailNum = 0;
} else {
this.currentThumbnailNum += 1;
}
}, INTERVAL);
},
onTouchStart() {
this.$store.commit("busTnTouchStart", this.doc._id);
if (!this.hover) {
this.onTnEnter()
}
},
}
}

View File

@ -0,0 +1,40 @@
<template>
<div class="thumbnail-progress-bar" :style="{width: `${percentProgress}%`}"></div>
</template>
<script>
export default {
name: "ThumbnailProgressBar",
props: ["doc", "progress"],
computed: {
percentProgress() {
return Math.min(Math.max(this.progress * 100, 0), 100);
}
}
}
</script>
<style scoped>
.thumbnail-progress-bar {
position: absolute;
left: 0;
bottom: 0;
height: 4px;
background: #2196f3AA;
z-index: 9;
}
.theme-black .thumbnail-progress-bar {
background: rgba(0, 188, 212, 0.95);
}
.sub-document .thumbnail-progress-bar {
max-width: calc(100% - 8px);
left: 4px;
}
</style>

View File

@ -71,7 +71,8 @@ export default {
hideDuplicates: "Hide duplicate results based on checksum",
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time",
useDatePicker: "Use a Date Picker component rather than a slider"
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms"
},
queryMode: {
simple: "Simple",
@ -237,7 +238,8 @@ export default {
hideDuplicates: "Masquer les résultats en double",
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider"
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
},
queryMode: {
simple: "Simple",
@ -403,7 +405,8 @@ export default {
hideDuplicates: "使用校验码隐藏重复结果",
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新",
useDatePicker: "使用日期选择器组件而不是滑块"
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位"
},
queryMode: {
simple: "简单",

View File

@ -50,6 +50,7 @@ export default new Vuex.Store({
optHideLegacy: false,
optUpdateMimeMap: false,
optUseDatePicker: false,
optVidPreviewInterval: 700,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@ -159,6 +160,7 @@ export default new Vuex.Store({
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@ -174,6 +176,12 @@ export default new Vuex.Store({
busSearch: () => {
// noop
},
busTouchEnd: () => {
// noop
},
busTnTouchStart: (doc_id) => {
// noop
},
},
actions: {
setSist2Info: (store, val) => {
@ -369,5 +377,6 @@ export default new Vuex.Store({
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
}
})

View File

@ -85,6 +85,10 @@
<label>{{ $t("opt.slideDuration") }}</label>
<b-form-input :value="optLightboxSlideDuration" type="number" min="1"
@input="setOptLightboxSlideDuration"></b-form-input>
<label>{{ $t("opt.vidPreviewInterval") }}</label>
<b-form-input :value="optVidPreviewInterval" type="number" min="50"
@input="setOptVidPreviewInterval"></b-form-input>
</b-card>
<h4 class="mt-3">{{ $t("treemapOptions") }}</h4>
@ -234,6 +238,7 @@ export default {
"optHideLegacy",
"optUpdateMimeMap",
"optUseDatePicker",
"optVidPreviewInterval",
]),
clientWidth() {
return window.innerWidth;
@ -279,6 +284,7 @@ export default {
"setOptHideLegacy",
"setOptUpdateMimeMap",
"setOptUseDatePicker",
"setOptVidPreviewInterval",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@ -100,6 +100,10 @@ export default Vue.extend({
...mapGetters(["indices", "optDisplay"]),
},
mounted() {
// Handle touch events
window.ontouchend = () => this.$store.commit("busTouchEnd");
window.ontouchcancel = this.$store.commit("busTouchEnd");
this.search = _debounce(async (clear: boolean) => {
if (clear) {
await this.clearResults();

View File

@ -5,7 +5,8 @@
#define DEFAULT_OUTPUT "index.sist2/"
#define DEFAULT_CONTENT_SIZE 32768
#define DEFAULT_QUALITY 1
#define DEFAULT_SIZE 300
#define DEFAULT_THUMBNAIL_SIZE 500
#define DEFAULT_THUMBNAIL_COUNT 1
#define DEFAULT_REWRITE_URL ""
#define DEFAULT_ES_URL "http://localhost:9200"
@ -96,7 +97,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->path = abs_path;
}
if (args->incremental != NULL) {
if (args->incremental != OPTION_VALUE_UNSPECIFIED) {
args->incremental = abspath(args->incremental);
if (abs_path == NULL) {
sist_log("main.c", LOG_SIST_WARNING, "Could not open original index! Disabled incremental scan feature.");
@ -104,32 +105,39 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
if (args->quality == 0) {
args->quality = DEFAULT_QUALITY;
} else if (args->quality < 1 || args->quality > 31) {
fprintf(stderr, "Invalid quality: %f\n", args->quality);
if (args->tn_quality == OPTION_VALUE_UNSPECIFIED) {
args->tn_quality = DEFAULT_QUALITY;
} else if (args->tn_quality < 1.0f || args->tn_quality > 31.0f) {
fprintf(stderr, "Invalid value for --thumbnail-quality argument: %f. Must be within [1.0, 31.0].\n",
args->tn_quality);
return 1;
}
if (args->size == 0) {
args->size = DEFAULT_SIZE;
} else if (args->size > 0 && args->size < 32) {
printf("Invalid size: %d\n", args->content_size);
if (args->tn_size == OPTION_VALUE_UNSPECIFIED) {
args->tn_size = DEFAULT_THUMBNAIL_SIZE;
} else if (args->tn_size < 32) {
printf("Invalid value --thumbnail-size argument: %d. Must be greater than 32 pixels.\n", args->tn_size);
return 1;
}
if (args->content_size == 0) {
if (args->tn_count == OPTION_VALUE_UNSPECIFIED) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {
args->content_size = DEFAULT_CONTENT_SIZE;
}
if (args->threads == 0) {
args->threads = 1;
} else if (args->threads < 0) {
fprintf(stderr, "Invalid threads: %d\n", args->threads);
fprintf(stderr, "Invalid value for --threads: %d. Must be a positive number\n", args->threads);
return 1;
}
if (args->output == NULL) {
if (args->output == OPTION_VALUE_UNSPECIFIED) {
args->output = malloc(strlen(DEFAULT_OUTPUT) + 1);
strcpy(args->output, DEFAULT_OUTPUT);
} else {
@ -148,7 +156,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->depth += 1;
}
if (args->name == NULL) {
if (args->name == OPTION_VALUE_UNSPECIFIED) {
args->name = g_path_get_basename(args->output);
} else {
char *tmp = malloc(strlen(args->name) + 1);
@ -156,11 +164,11 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->name = tmp;
}
if (args->rewrite_url == NULL) {
if (args->rewrite_url == OPTION_VALUE_UNSPECIFIED) {
args->rewrite_url = DEFAULT_REWRITE_URL;
}
if (args->archive == NULL || strcmp(args->archive, "recurse") == 0) {
if (args->archive == OPTION_VALUE_UNSPECIFIED || strcmp(args->archive, "recurse") == 0) {
args->archive_mode = ARC_MODE_RECURSE;
} else if (strcmp(args->archive, "list") == 0) {
args->archive_mode = ARC_MODE_LIST;
@ -173,17 +181,17 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->ocr_images && args->tesseract_lang == NULL) {
if (args->ocr_images && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images");
return 1;
}
if (args->ocr_ebooks && args->tesseract_lang == NULL) {
if (args->ocr_ebooks && args->tesseract_lang == OPTION_VALUE_UNSPECIFIED) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks");
return 1;
}
if (args->tesseract_lang != NULL) {
if (args->tesseract_lang != OPTION_VALUE_UNSPECIFIED) {
if (!args->ocr_ebooks && !args->ocr_images) {
fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images");
@ -227,7 +235,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tesseract_path = trained_data_path;
}
if (args->exclude_regex != NULL) {
if (args->exclude_regex != OPTION_VALUE_UNSPECIFIED) {
const char *error;
int error_offset;
@ -247,13 +255,13 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
ScanCtx.exclude = NULL;
}
if (args->treemap_threshold_str == 0) {
if (args->treemap_threshold_str == OPTION_VALUE_UNSPECIFIED) {
args->treemap_threshold = DEFAULT_TREEMAP_THRESHOLD;
} else {
args->treemap_threshold = atof(args->treemap_threshold_str);
}
if (args->max_memory_buffer == 0) {
if (args->max_memory_buffer == OPTION_VALUE_UNSPECIFIED) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
@ -261,7 +269,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->scan_mem_limit = DEFAULT_THROTTLE_MEMORY_THRESHOLD;
}
if (args->list_path != NULL) {
if (args->list_path != OPTION_VALUE_UNSPECIFIED) {
if (strcmp(args->list_path, "-") == 0) {
args->list_file = stdin;
LOG_DEBUG("cli.c", "Using stdin as list file")
@ -274,8 +282,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
}
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg tn_quality=%f", args->tn_quality)
LOG_DEBUGF("cli.c", "arg tn_size=%d", args->tn_size)
LOG_DEBUGF("cli.c", "arg tn_count=%d", args->tn_count)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
LOG_DEBUGF("cli.c", "arg threads=%d", args->threads)
LOG_DEBUGF("cli.c", "arg incremental=%s", args->incremental)

View File

@ -5,9 +5,12 @@
#include "libscan/arc/arc.h"
#define OPTION_VALUE_DISABLE (-1)
#define OPTION_VALUE_UNSPECIFIED (0)
typedef struct scan_args {
float quality;
int size;
float tn_quality;
int tn_size;
int content_size;
int threads;
int scan_mem_limit;
@ -30,6 +33,8 @@ typedef struct scan_args {
double treemap_threshold;
int max_memory_buffer;
int read_subtitles;
/** Number of thumbnails to generate */
int tn_count;
int fast_epub;
int calculate_checksums;
char *list_path;

File diff suppressed because one or more lines are too long

View File

@ -133,6 +133,7 @@ char *build_json_string(document_t *doc) {
while (meta != NULL) {
switch (meta->key) {
case MetaThumbnail:
case MetaPages:
case MetaWidth:
case MetaHeight:
@ -163,7 +164,6 @@ char *build_json_string(document_t *doc) {
case MetaExifModel:
case MetaAuthor:
case MetaModifiedBy:
case MetaThumbnail:
case MetaExifGpsLongitudeDMS:
case MetaExifGpsLongitudeDec:
case MetaExifGpsLongitudeRef:

View File

@ -55,7 +55,16 @@ void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t bu
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}

View File

@ -189,37 +189,41 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.comic_ctx.log = _log;
ScanCtx.comic_ctx.logf = _logf;
ScanCtx.comic_ctx.store = _store;
ScanCtx.comic_ctx.tn_size = args->size;
ScanCtx.comic_ctx.tn_qscale = args->quality;
ScanCtx.comic_ctx.enable_tn = args->tn_count > 0;
ScanCtx.comic_ctx.tn_size = args->tn_size;
ScanCtx.comic_ctx.tn_qscale = args->tn_quality;
ScanCtx.comic_ctx.cbr_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbr");
ScanCtx.comic_ctx.cbz_mime = mime_get_mime_by_string(ScanCtx.mime_table, "application/x-cbz");
// Ebook
pthread_mutex_init(&ScanCtx.ebook_ctx.mupdf_mutex, NULL);
ScanCtx.ebook_ctx.content_size = args->content_size;
ScanCtx.ebook_ctx.tn_size = args->size;
ScanCtx.ebook_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ebook_ctx.tn_size = args->tn_size;
ScanCtx.ebook_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.ebook_ctx.tesseract_path = args->tesseract_path;
ScanCtx.ebook_ctx.log = _log;
ScanCtx.ebook_ctx.logf = _logf;
ScanCtx.ebook_ctx.store = _store;
ScanCtx.ebook_ctx.fast_epub_parse = args->fast_epub;
ScanCtx.ebook_ctx.tn_qscale = args->quality;
ScanCtx.ebook_ctx.tn_qscale = args->tn_quality;
// Font
ScanCtx.font_ctx.enable_tn = args->size > 0;
ScanCtx.font_ctx.enable_tn = args->tn_count > 0;
ScanCtx.font_ctx.log = _log;
ScanCtx.font_ctx.logf = _logf;
ScanCtx.font_ctx.store = _store;
// Media
ScanCtx.media_ctx.tn_qscale = args->quality;
ScanCtx.media_ctx.tn_size = args->size;
ScanCtx.media_ctx.tn_qscale = args->tn_quality;
ScanCtx.media_ctx.tn_size = args->tn_size;
ScanCtx.media_ctx.tn_count = args->tn_count;
ScanCtx.media_ctx.log = _log;
ScanCtx.media_ctx.logf = _logf;
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
ScanCtx.media_ctx.read_subtitles = args->tn_count;
if (args->ocr_images) {
ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang;
@ -228,6 +232,7 @@ void initialize_scan_context(scan_args_t *args) {
init_media();
// OOXML
ScanCtx.ooxml_ctx.enable_tn = args->tn_count > 0;
ScanCtx.ooxml_ctx.content_size = args->content_size;
ScanCtx.ooxml_ctx.log = _log;
ScanCtx.ooxml_ctx.logf = _logf;
@ -244,7 +249,8 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.text_ctx.logf = _logf;
// MSDOC
ScanCtx.msdoc_ctx.tn_size = args->size;
ScanCtx.msdoc_ctx.enable_tn = args->tn_count > 0;
ScanCtx.msdoc_ctx.tn_size = args->tn_size;
ScanCtx.msdoc_ctx.content_size = args->content_size;
ScanCtx.msdoc_ctx.log = _log;
ScanCtx.msdoc_ctx.logf = _logf;
@ -263,8 +269,9 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.fast = args->fast;
// Raw
ScanCtx.raw_ctx.tn_qscale = args->quality;
ScanCtx.raw_ctx.tn_size = args->size;
ScanCtx.raw_ctx.tn_qscale = args->tn_quality;
ScanCtx.raw_ctx.enable_tn = args->tn_count > 0;
ScanCtx.raw_ctx.tn_size = args->tn_size;
ScanCtx.raw_ctx.log = _log;
ScanCtx.raw_ctx.logf = _logf;
ScanCtx.raw_ctx.store = _store;
@ -309,7 +316,8 @@ void load_incremental_index(const scan_args_t *args) {
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)), 1);
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@ -320,7 +328,7 @@ void load_incremental_index(const scan_args_t *args) {
* 1. Build original_table - new_table => delete_table
* 2. Incrementally copy from old index files [(original+main) /\ copy_table] => index_original.ndjson.zst & store
*/
void save_incremental_index(scan_args_t* args) {
void save_incremental_index(scan_args_t *args) {
char dst_path[PATH_MAX];
char store_path[PATH_MAX];
char file_path[PATH_MAX];
@ -330,15 +338,17 @@ void save_incremental_index(scan_args_t* args) {
store_t *source = store_create(store_path, STORE_SIZE_TN);
LOG_INFOF("main.c", "incremental_delete: original size = %u, copy size = %u, new size = %u",
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
g_hash_table_size(ScanCtx.original_table),
g_hash_table_size(ScanCtx.copy_table),
g_hash_table_size(ScanCtx.new_table));
snprintf(del_path, PATH_MAX, "%s_index_delete.list.zst", ScanCtx.index.path);
READ_INDICES(file_path, args->incremental, incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
READ_INDICES(file_path, args->incremental,
incremental_delete(del_path, file_path, ScanCtx.copy_table, ScanCtx.new_table),
perror("incremental_delete"), 1);
writer_cleanup();
READ_INDICES(file_path, args->incremental, incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
READ_INDICES(file_path, args->incremental,
incremental_copy(source, ScanCtx.index.store, file_path, dst_path, ScanCtx.copy_table),
perror("incremental_copy"), 1);
writer_cleanup();
@ -412,6 +422,8 @@ void sist2_scan(scan_args_t *args) {
LOG_DEBUGF("main.c", "Skipped files: %d", ScanCtx.dbg_skipped_files_count)
LOG_DEBUGF("main.c", "Excluded files: %d", ScanCtx.dbg_excluded_files_count)
LOG_DEBUGF("main.c", "Failed files: %d", ScanCtx.dbg_failed_files_count)
LOG_DEBUGF("main.c", "Thumbnail store size: %d", ScanCtx.stat_tn_size)
LOG_DEBUGF("main.c", "Index size: %d", ScanCtx.stat_index_size)
if (args->incremental != NULL) {
save_incremental_index(args);
@ -551,13 +563,34 @@ void sist2_web(web_args_t *args) {
WebCtx.indices[i].desc = read_index_descriptor(path_tmp);
strcpy(WebCtx.indices[i].path, abs_path);
printf("Loaded index: %s\n", WebCtx.indices[i].desc.name);
LOG_INFOF("main.c", "Loaded index: [%s]", WebCtx.indices[i].desc.name)
free(abs_path);
}
serve(args->listen_address);
}
/**
* Callback to handle options such that
*
* Unspecified -> 0: Set to default value
* Specified "0" -> -1: Disable the option (ex. don't generate thumbnails)
* Negative number -> Raise error
* Specified a valid number -> Continue as normal
*/
int set_to_negative_if_value_is_zero(struct argparse *self, const struct argparse_option *option) {
int specified_value = *(int *) option->value;
if (specified_value == 0) {
*((int *) option->data) = OPTION_VALUE_DISABLE;
}
if (specified_value < 0) {
fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
exit(1);
}
}
int main(int argc, const char *argv[]) {
sigsegv_handler = signal(SIGSEGV, sig_handler);
@ -588,12 +621,18 @@ int main(int argc, const char *argv[]) {
OPT_GROUP("Scan options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
OPT_STRING(0, "mem-throttle", &scan_args->scan_mem_limit, "Total memory threshold in MB for scan throttling. DEFAULT=0"),
OPT_FLOAT('q', "quality", &scan_args->quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=3"),
OPT_INTEGER(0, "size", &scan_args->size,
"Thumbnail size, in pixels. Use negative value to disable. DEFAULT=500"),
OPT_FLOAT('q', "thumbnail-quality", &scan_args->tn_quality,
"Thumbnail quality, on a scale of 1.0 to 31.0, 1.0 being the best. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_quality),
OPT_INTEGER(0, "thumbnail-size", &scan_args->tn_size,
"Thumbnail size, in pixels. DEFAULT=500",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_size),
OPT_INTEGER(0, "thumbnail-count", &scan_args->tn_count,
"Number of thumbnails to generate. Set a value > 1 to create video previews, set to 0 to disable thumbnails. DEFAULT=1",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->tn_count),
OPT_INTEGER(0, "content-size", &scan_args->content_size,
"Number of bytes to be extracted from text documents. Use negative value to disable. DEFAULT=32768"),
"Number of bytes to be extracted from text documents. Set to 0 to disable. DEFAULT=32768",
set_to_negative_if_value_is_zero, (intptr_t) &scan_args->content_size),
OPT_STRING(0, "incremental", &scan_args->incremental,
"Reuse an existing index and only scan modified files."),
OPT_STRING('o', "output", &scan_args->output, "Output directory. DEFAULT=index.sist2/"),
@ -633,7 +672,7 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "es-index", &common_es_index, "Elasticsearch index name. DEFAULT=sist2"),
OPT_BOOLEAN('p', "print", &index_args->print, "Just print JSON documents to stdout."),
OPT_BOOLEAN(0, "incremental-index", &index_args->incremental,
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
"Conduct incremental indexing, assumes that the old index is already digested by Elasticsearch."),
OPT_STRING(0, "script-file", &common_script_path, "Path to user script."),
OPT_STRING(0, "mappings-file", &index_args->es_mappings_path, "Path to Elasticsearch mappings."),
OPT_STRING(0, "settings-file", &index_args->es_settings_path, "Path to Elasticsearch settings."),

View File

@ -60,6 +60,8 @@ static const char *const Version = VERSION;
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.3"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@ -8,12 +8,16 @@
#include <src/ctx.h>
#define HTTP_SERVER_HEADER "Server: sist2/" VERSION "\r\n"
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
nc,
"HTTP/1.1 %d %s\r\n"
"Server: sist2/" VERSION "\r\n"
HTTP_SERVER_HEADER
"Content-Length: %d\r\n"
"%s\r\n\r\n",
status_code, "OK",
@ -60,7 +64,7 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -70,7 +74,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
index_t *index = get_index_by_id(arg_md5);
if (index == NULL) {
mg_http_reply(nc, 404, "", "");
HTTP_REPLY_NOT_FOUND
return;
}
@ -138,10 +142,16 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int parse_tn_num = FALSE;
if (hm->uri.len != 68) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
return;
if (hm->uri.len != 68 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
parse_tn_num = TRUE;
}
char arg_file_md5[MD5_STR_LENGTH];
@ -158,12 +168,25 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
store_t *store = get_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
char *data;
size_t data_len = 0;
char *data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
}
if (data_len != 0) {
send_response_line(
nc, 200, data_len,
@ -173,7 +196,7 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
mg_send(nc, data, data_len);
free(data);
} else {
mg_http_reply(nc, 404, "Content-Type: text/plain;charset=utf-8\r\n", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
}
@ -182,7 +205,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len == 0) {
LOG_DEBUG("serve.c", "Client sent empty body, ignoring request")
mg_http_reply(nc, 500, "", "Invalid request");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Invalid request");
return;
}
@ -226,6 +249,11 @@ void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, struct mg_http_message *hm) {
if (strcmp(MG_VERSION, EXPECTED_MONGOOSE_VERSION) != 0) {
LOG_WARNING("serve.c", "sist2 was not linked with latest mongoose version, "
"serving file from disk might not work as expected.")
}
const char *path = cJSON_GetObjectItem(json, "path")->valuestring;
const char *name = cJSON_GetObjectItem(json, "name")->valuestring;
const char *ext = cJSON_GetObjectItem(json, "extension")->valuestring;
@ -246,7 +274,7 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
"Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
@ -273,6 +301,7 @@ void index_info(struct mg_connection *nc) {
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
@ -314,7 +343,7 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -328,14 +357,14 @@ void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -350,7 +379,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -369,7 +398,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
cJSON *parent = cJSON_GetObjectItem(source, "parent");
@ -383,7 +412,7 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (idx == NULL) {
cJSON_Delete(doc);
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -452,7 +481,7 @@ tag_req_t *parse_tag_request(cJSON *json) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -462,14 +491,14 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
store_t *store = get_tag_store(arg_index);
if (store == NULL) {
LOG_DEBUGF("serve.c", "Could not get tag store for index: %s", arg_index)
mg_http_reply(nc, 404, "", "Not found");
HTTP_REPLY_NOT_FOUND
return;
}
@ -615,7 +644,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
mg_http_reply(nc, 404, "", "Page not found");
HTTP_REPLY_NOT_FOUND
}
} else if (ev == MG_EV_POLL) {
@ -645,7 +674,8 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
free(tmp);
}
mg_http_reply(nc, 500, "", "");
mg_http_reply(nc, 500, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER,
"Elasticsearch error, see server logs.");
}
free_response(r);
@ -659,7 +689,7 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
void serve(const char *listen_address) {
printf("Starting web server @ http://%s\n", listen_address);
LOG_INFOF("serve.c", "Starting web server @ http://%s", listen_address)
struct mg_mgr mgr;
mg_mgr_init(&mgr);

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,7 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
struct archive_entry *entry = NULL;
arc_data_t arc_data;
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}
@ -44,7 +44,20 @@ void parse_comic(scan_comic_ctx_t *ctx, vfile_t *f, document_t *doc) {
break;
}
ret = store_image_thumbnail((scan_media_ctx_t *) ctx, buf, entry_size, doc, file_path);
scan_media_ctx_t media_ctx = {
.tn_count = ctx->enable_tn ? 1 : 0,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL,
.read_subtitles = FALSE,
.max_media_buffer = 0,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
};
ret = store_image_thumbnail(&media_ctx, buf, entry_size, doc, file_path);
free(buf);
if (ret == TRUE) {

View File

@ -9,6 +9,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;

View File

@ -155,7 +155,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, pixmap->w, pixmap->h)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
@ -283,7 +283,7 @@ parse_ebook_mem(scan_ebook_ctx_t *ctx, void *buf, size_t buf_len, const char *mi
APPEND_LONG_META(doc, MetaPages, page_count)
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
if (render_cover(ctx, fzctx, doc, fzdoc) == FALSE) {
fz_drop_stream(fzctx, stream);
fz_drop_document(fzctx, fzdoc);
@ -404,7 +404,7 @@ void parse_epub_fast(scan_ebook_ctx_t *ctx, vfile_t *f, document_t *doc) {
text_buffer_t content_buffer = text_buffer_create(ctx->content_size);
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
return;
}

View File

@ -6,6 +6,7 @@
typedef struct {
long content_size;
int tn_size;
int enable_tn;
const char *tesseract_lang;
const char *tesseract_path;
pthread_mutex_t mupdf_mutex;

View File

@ -176,7 +176,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
strcpy(meta_name->str_val, font_name);
APPEND_META(doc, meta_name)
if (ctx->enable_tn == TRUE) {
if (!ctx->enable_tn) {
FT_Done_Face(face);
free(buf);
return;
@ -231,7 +231,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
dyn_buffer_t bmp_data = dyn_buffer_create();
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_TN_META(doc, dimensions.width, dimensions.height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);

View File

@ -35,11 +35,6 @@
meta_long->long_val = value; \
APPEND_META(doc, meta_long)}
#define APPEND_TN_META(doc, width, height) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + 4 + 1 + 4); \
meta_str->key = MetaThumbnail; \
sprintf(meta_str->str_val, "%04d,%04d", width, height); \
APPEND_META(doc, meta_str)}
#define APPEND_META(doc, meta) \
meta->next = NULL;\

View File

@ -6,7 +6,6 @@
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STREAM_IS_IMAGE (stream->nb_frames <= 1)
#define STORE_AS_IS ((void*)-1)
@ -398,6 +397,110 @@ void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *dec
av_frame_free(&rgb_frame);
}
#define SAVE_THUMBNAIL_OK 0
#define SAVE_THUMBNAIL_SKIPPED 1
#define SAVE_THUMBNAIL_FAILED 2
int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *decoder,
AVStream *stream, int video_stream, document_t *doc, double seek_ratio,
int thumbnail_index) {
if (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ok = FALSE;
double target_timestamp = (double) pFormatCtx->duration * seek_ratio;
long ts = (long) target_timestamp;
int seek_ret = avformat_seek_file(
// Allow +- 1s
pFormatCtx, -1, ts - AV_TIME_BASE, ts, ts + AV_TIME_BASE,
0
);
if (seek_ret == 0) {
seek_ok = TRUE;
} else {
CTX_LOG_DEBUGF(
doc->filepath,
"(media.c) Could not seek media file: %s", av_err2str(seek_ret)
)
}
if (seek_ok == FALSE && thumbnail_index != 0) {
CTX_LOG_WARNING(doc->filepath, "(media.c) Could not seek media file. Can't generate additional thumbnails.")
return SAVE_THUMBNAIL_FAILED;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && IS_VIDEO(pFormatCtx)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
if (thumbnail_index == 0) {
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
}
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
return SAVE_THUMBNAIL_FAILED;
}
int return_value;
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
return_value = SAVE_THUMBNAIL_OK;
// TO FIX: the 2nd rendered frame is always broken, just skip it until
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
return_value = SAVE_THUMBNAIL_SKIPPED;
}
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
return return_value;
}
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
@ -458,7 +561,7 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
append_audio_meta(pFormatCtx, doc);
}
if (video_stream != -1 && ctx->tn_size > 0) {
if (video_stream != -1 && ctx->tn_count > 0) {
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
@ -473,69 +576,38 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_parameters_to_context(decoder, stream->codecpar);
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
(long) ((double) stream->duration * 0.10), 0);
if (seek_ret == 0) {
break;
}
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
? 0.10
: 1.0 / (thumbnails_to_generate + 1);
int number_of_thumbnails_generated = 0;
int save_thumbnail_ret;
for (int i = 0; i < thumbnails_to_generate; i++) {
double seek_ratio = seek_increment * i + seek_increment * 0.9;
save_thumbnail_ret = decode_frame_and_save_thumbnail(ctx, pFormatCtx, decoder, stream, video_stream, doc,
seek_ratio, i);
if (save_thumbnail_ret == SAVE_THUMBNAIL_FAILED) {
break;
}
if (save_thumbnail_ret == SAVE_THUMBNAIL_OK) {
number_of_thumbnails_generated += 1;
}
}
frame_and_packet_t *frame_and_packet = read_frame(ctx, pFormatCtx, decoder, video_stream, doc);
if (frame_and_packet == NULL) {
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
if (number_of_thumbnails_generated > 0) {
APPEND_LONG_META(doc, MetaThumbnail, number_of_thumbnails_generated)
}
if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
// Scale frame
AVFrame *scaled_frame = scale_frame(decoder, frame_and_packet->frame, ctx->tn_size);
if (scaled_frame == NULL) {
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
AVCodecContext *jpeg_encoder = alloc_jpeg_encoder(scaled_frame->width, scaled_frame->height,
ctx->tn_qscale);
avcodec_send_frame(jpeg_encoder, scaled_frame);
AVPacket jpeg_packet;
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
avcodec_free_context(&jpeg_encoder);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
av_frame_free(&scaled_frame);
}
frame_and_packet_free(frame_and_packet);
avcodec_free_context(&decoder);
}
@ -772,7 +844,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
}
if (scaled_frame == STORE_AS_IS) {
APPEND_TN_META(doc, frame_and_packet->frame->width, frame_and_packet->frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
@ -786,7 +858,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
// Save thumbnail
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);

View File

@ -17,6 +17,9 @@ typedef struct {
int tn_size;
float tn_qscale;
/** Number of thumbnails to generate for videos */
int tn_count;
long max_media_buffer;
int read_subtitles;

View File

@ -76,6 +76,7 @@ void parse_msdoc_pdf(scan_msdoc_ctx_t *ctx, document_t *doc, FILE *file, void *b
scan_ebook_ctx_t ebook_ctx = {
.content_size = ctx->content_size,
.tn_size = ctx->tn_size,
.enable_tn = TRUE,
.log = ctx->log,
.logf = ctx->logf,
.store = ctx->store,
@ -137,7 +138,7 @@ void parse_msdoc(scan_msdoc_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
if (ctx->tn_size > 0) {
if (ctx->enable_tn) {
char *buf_pdf = malloc(buf_len);
memcpy(buf_pdf, buf, buf_len);
parse_msdoc_pdf(ctx, doc, file, buf_pdf, buf_len);

View File

@ -5,6 +5,7 @@
typedef struct {
long content_size;
int enable_tn;
int tn_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -190,7 +190,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
char *buf = malloc(entry_size);
archive_read_data(a, buf, entry_size);
APPEND_TN_META(doc, 1, 1) // Size unknown
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
free(buf);
}
@ -238,7 +238,7 @@ void parse_ooxml(scan_ooxml_ctx_t *ctx, vfile_t *f, document_t *doc) {
if (read_doc_props(ctx, a, doc) != 0) {
break;
}
} else if (strcmp(path, "docProps/thumbnail.jpeg") == 0) {
} else if (ctx->enable_tn && strcmp(path, "docProps/thumbnail.jpeg") == 0) {
read_thumbnail(ctx, doc, a, entry);
}
}

View File

@ -5,6 +5,7 @@
#include "../scan.h"
typedef struct {
int enable_tn;
long content_size;
log_callback_t log;
logf_callback_t logf;

View File

@ -69,7 +69,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
av_init_packet(&jpeg_packet);
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_TN_META(doc, scaled_frame->width, scaled_frame->height)
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
@ -157,7 +157,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
APPEND_STR_META(doc, MetaMediaVideoCodec, "raw")
if (ctx->tn_size <= 0) {
if (!ctx->enable_tn) {
free(buf);
libraw_close(libraw_lib);
return;

View File

@ -8,6 +8,7 @@ typedef struct {
logf_callback_t logf;
store_callback_t store;
int enable_tn;
int tn_size;
float tn_qscale;
} scan_raw_ctx_t;

View File

@ -350,9 +350,13 @@ TEST(Comic, ComicIssue160) {
load_doc_file("libscan-test-files/test_files/ebook/comic-segfault-issue-160.cbr", &f, &doc);
int tn_size_saved = comic_ctx.tn_size;
comic_ctx.tn_size = 0;
size_t size_before = store_size;
comic_ctx.enable_tn = FALSE;
parse_comic(&comic_ctx, &f, &doc);
comic_ctx.tn_size = tn_size_saved;
comic_ctx.enable_tn = tn_size_saved;
ASSERT_EQ(store_size, size_before);
cleanup(&doc, &f);
}
@ -669,8 +673,6 @@ TEST(Ooxml, Docx2Archive) {
ASSERT_EQ(get_meta(&LastSubDoc, MetaPages)->long_val, 1);
ASSERT_EQ(strlen(get_meta(&LastSubDoc, MetaContent)->str_val), 2780);
fprintf(stderr, "%s\n", get_meta(&LastSubDoc, MetaContent)->str_val);
ooxml_500_ctx.content_size = 500;
cleanup(&doc, &f);
@ -1111,6 +1113,7 @@ int main(int argc, char **argv) {
ebook_ctx.tesseract_lang = "eng";
ebook_ctx.tesseract_path = "./tessdata";
ebook_ctx.tn_size = 500;
ebook_ctx.enable_tn = TRUE;
ebook_ctx.log = noop_log;
ebook_ctx.logf = noop_logf;
ebook_ctx.fast_epub_parse = 0;
@ -1124,12 +1127,14 @@ int main(int argc, char **argv) {
comic_ctx.tn_qscale = 1.0;
comic_ctx.tn_size = 500;
comic_ctx.enable_tn = TRUE;
comic_ctx.log = noop_log;
comic_ctx.logf = noop_logf;
comic_ctx.store = counter_store;
comic_big_ctx.tn_qscale = 1.0;
comic_big_ctx.tn_size = 5000;
comic_big_ctx.enable_tn = TRUE;
comic_big_ctx.log = noop_log;
comic_big_ctx.logf = noop_logf;
comic_big_ctx.store = counter_store;
@ -1138,10 +1143,12 @@ int main(int argc, char **argv) {
media_ctx.logf = noop_logf;
media_ctx.store = counter_store;
media_ctx.tn_size = 500;
media_ctx.tn_count = 1;
media_ctx.tn_qscale = 1.0;
media_ctx.max_media_buffer = (long) 2000 * (long) 1024 * (long) 1024;
ooxml_500_ctx.content_size = 500;
ooxml_500_ctx.enable_tn = TRUE;
ooxml_500_ctx.log = noop_log;
ooxml_500_ctx.logf = noop_logf;
ooxml_500_ctx.store = counter_store;
@ -1154,6 +1161,7 @@ int main(int argc, char **argv) {
raw_ctx.logf = noop_logf;
raw_ctx.store = counter_store;
raw_ctx.tn_size = 500;
raw_ctx.enable_tn = TRUE;
raw_ctx.tn_qscale = 5.0;
msdoc_ctx.log = noop_log;
@ -1161,12 +1169,14 @@ int main(int argc, char **argv) {
msdoc_ctx.store = counter_store;
msdoc_ctx.content_size = 500;
msdoc_ctx.tn_size = 500;
msdoc_ctx.enable_tn = TRUE;
msdoc_text_ctx.log = noop_log;
msdoc_text_ctx.logf = noop_logf;
msdoc_text_ctx.store = counter_store;
msdoc_text_ctx.content_size = 500;
msdoc_text_ctx.tn_size = 0;
msdoc_text_ctx.enable_tn = FALSE;
wpd_ctx.log = noop_log;
wpd_ctx.logf = noop_logf;