Merge pull request #284 from simon987/dev

v2.12.0
This commit is contained in:
simon987 2022-04-17 13:38:42 -04:00 committed by GitHub
commit fe56da95d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
56 changed files with 803 additions and 459 deletions

3
.gitmodules vendored
View File

@ -7,3 +7,6 @@
[submodule "third-party/libscan/third-party/antiword"]
path = third-party/libscan/third-party/antiword
url = https://github.com/simon987/antiword
[submodule "third-party/libscan/third-party/libmobi"]
path = third-party/libscan/third-party/libmobi
url = https://github.com/bfabiszewski/libmobi

View File

@ -4,6 +4,7 @@ set(CMAKE_C_STANDARD 11)
project(sist2 C)
option(SIST_DEBUG "Build a debug executable" on)
option(SIST_FAST "Enable more optimisation flags" off)
option(SIST_FAKE_STORE "Disable IO operations of LMDB stores for debugging purposes" 0)
add_compile_definitions(
@ -54,6 +55,10 @@ find_package(lmdb CONFIG REQUIRED)
find_package(cJSON CONFIG REQUIRED)
find_package(unofficial-mongoose CONFIG REQUIRED)
find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB
NAMES libmagic.so.1 magic
PATHS /usr/lib/x86_64-linux-gnu/ /usr/lib/aarch64-linux-gnu/
)
target_include_directories(
@ -93,10 +98,22 @@ if (SIST_DEBUG)
PROPERTIES
OUTPUT_NAME sist2_debug
)
elseif (SIST_FAST)
target_compile_options(
sist2
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else ()
target_compile_options(
sist2
PRIVATE
-Ofast
-fno-stack-protector
-fomit-frame-pointer
@ -121,11 +138,12 @@ target_link_libraries(
CURL::libcurl
pthread
magic
c
scan
${MAGIC_LIB}
)
add_custom_target(

View File

@ -9,7 +9,7 @@ RUN strip sist2 || mv sist2_debug sist2
FROM --platform="linux/amd64" ubuntu:21.10
RUN apt update && apt install -y curl libasan5 && rm -rf /var/lib/apt/lists/*
RUN apt update && apt install -y curl libasan5 libmagic1 && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/share/tessdata && \
cd /usr/share/tessdata/ && \

View File

@ -52,7 +52,7 @@ sist2 (Simple incremental search tool)
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x` *
2. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
3. *(or)* `docker pull simon987/sist2:2.11.7-x64-linux`
3. *(or)* `docker pull simon987/sist2:2.12.0-x64-linux`
1. See [Usage guide](docs/USAGE.md)

View File

@ -103,7 +103,7 @@ Made by simon987 <me@simon987.net>. Released under GPL-3.0
* `--thumbnail-count`
Maximum number of thumbnails to generate. When set to a value >= 2, thumbnails for video previews
will be generated. The actual number of thumbnails generated depends on the length of the video (maximum 1 image
every ~5s). Set to 0 to completely disable thumbnails.
every ~7s). Set to 0 to completely disable thumbnails.
* `--content-size`
Number of bytes of text to be extracted from the content of files (plain text, PDFs etc.).
Repeated whitespace and special characters do not count toward this limit.

View File

@ -3,7 +3,7 @@
"refresh_interval": "30s",
"codec": "best_compression",
"number_of_replicas": 0,
"highlight.max_analyzed_offset": 10000000
"highlight.max_analyzed_offset": 1000000
},
"analysis": {
"tokenizer": {
@ -55,5 +55,37 @@
]
}
}
},
"mappings": {
"dynamic_templates": [
{
"keyword_fields": {
"match_mapping_type": "string",
"match": "kw_*",
"mapping": {
"type": "keyword"
}
}
},
{
"integer_fields": {
"match_mapping_type": "*",
"match": "int_*",
"mapping": {
"type": "integer"
}
}
},
{
"meta_fields": {
"match_mapping_type": "*",
"match": "mt_*",
"mapping": {
"type": "keyword",
"index": false
}
}
}
]
}
}

View File

@ -1,2 +1,3 @@
docker run --rm -it -p 9200:9200 -e "discovery.type=single-node" \
docker run --rm -it --name "sist2-dev-es"\
-p 9200:9200 -e "discovery.type=single-node" \
-e "ES_JAVA_OPTS=-Xms8g -Xmx8g" elasticsearch:7.14.0

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,6 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",
@ -5261,11 +5260,6 @@
"node": "*"
}
},
"node_modules/crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"node_modules/css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@ -9742,9 +9736,9 @@
}
},
"node_modules/minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"node_modules/minipass": {
@ -19621,11 +19615,6 @@
"randomfill": "^1.0.3"
}
},
"crypto-es": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/crypto-es/-/crypto-es-1.2.7.tgz",
"integrity": "sha512-UUqiVJ2gUuZFmbFsKmud3uuLcNP2+Opt+5ysmljycFCyhA0+T16XJmo1ev/t5kMChMqWh7IEvURNCqsg+SjZGQ=="
},
"css-color-names": {
"version": "0.0.4",
"resolved": "https://registry.npmjs.org/css-color-names/-/css-color-names-0.0.4.tgz",
@ -23335,9 +23324,9 @@
}
},
"minimist": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
"integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
"version": "1.2.6",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
"integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
"dev": true
},
"minipass": {

View File

@ -11,7 +11,6 @@
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"crypto-es": "^1.2.7",
"d3": "^5.16.0",
"date-fns": "^2.21.3",
"dom-to-image": "^2.6.0",

View File

@ -1,6 +1,5 @@
import axios from "axios";
import {ext, strUnescape, lum} from "./util";
import CryptoES from 'crypto-es';
export interface EsTag {
id: string
@ -30,7 +29,6 @@ export interface EsHit {
_index: string
_id: string
_score: number
_path_md5: string
_type: string
_tags: Tag[]
_seq: number
@ -249,11 +247,6 @@ class Sist2Api {
res.hits.hits.forEach((hit: EsHit) => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_path_md5"] = CryptoES.MD5(
hit["_source"]["path"] +
(hit["_source"]["path"] ? "/" : "") +
hit["_source"]["name"] + ext(hit)
).toString();
this.setHitProps(hit);
this.setHitTags(hit);
@ -343,10 +336,6 @@ class Sist2Api {
};
}
getDocInfo(docId: string) {
return axios.get(`${this.baseUrl}d/${docId}`);
}
getTags() {
return this.esQuery({
aggs: {
@ -380,8 +369,7 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: false,
name: tag,
doc_id: hit["_id"],
path_md5: hit._path_md5
doc_id: hit["_id"]
});
}
@ -389,8 +377,7 @@ class Sist2Api {
return axios.post(`${this.baseUrl}tag/` + hit["_source"]["index"], {
delete: true,
name: tag,
doc_id: hit["_id"],
path_md5: hit._path_md5
doc_id: hit["_id"]
});
}

View File

@ -69,7 +69,7 @@ interface SortMode {
class Sist2Query {
searchQuery(): any {
searchQuery(blankSearch: boolean = false): any {
const getters = store.getters;
@ -93,22 +93,6 @@ class Sist2Query {
{terms: {index: selectedIndexIds}}
] as any[];
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
const fields = [
"name^8",
"content^3",
@ -128,20 +112,39 @@ class Sist2Query {
fields.push("name.nGram^3");
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (!blankSearch) {
if (sizeMin && sizeMax) {
filters.push({range: {size: {gte: sizeMin, lte: sizeMax}}})
} else if (sizeMin) {
filters.push({range: {size: {gte: sizeMin}}})
} else if (sizeMax) {
filters.push({range: {size: {lte: sizeMax}}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
if (path !== "") {
filters.push({term: {path: path}})
}
if (selectedMimeTypes.length > 0) {
filters.push({terms: {"mime": selectedMimeTypes}});
}
if (selectedTags.length > 0) {
if (getters.optTagOrOperator) {
filters.push({terms: {"tag": selectedTags}});
} else {
selectedTags.forEach((tag: string) => filters.push({term: {"tag": tag}}));
}
}
}
@ -182,7 +185,7 @@ class Sist2Query {
size: size,
} as any;
if (!empty) {
if (!empty && !blankSearch) {
q.query.bool.must = query;
}
@ -207,7 +210,7 @@ class Sist2Query {
};
if (!legacyES) {
q.highlight.max_analyzed_offset = 9_999_999;
q.highlight.max_analyzed_offset = 999_999;
}
if (getters.optSearchInPath) {
@ -237,7 +240,7 @@ class Sist2Query {
}
}
if (!empty) {
if (!empty && !blankSearch) {
q.query.function_score.query.bool.must.push(query);
}
}

View File

@ -72,6 +72,12 @@ export default {
}
});
Object.keys(src).forEach(key => {
if (key.startsWith("mt_") || key.startsWith("int_")) {
items.push({key: key, value: src[key]});
}
});
// Exif GPS
if ("exif_gps_longitude_dec" in src) {
items.push({

View File

@ -1,11 +1,13 @@
<template>
<Preloader v-if="loading"></Preloader>
<div v-else-if="content" class="content-div">{{ content }}</div>
<div v-else-if="content" class="content-div" v-html="content"></div>
</template>
<script>
import Sist2Api from "@/Sist2Api";
import Preloader from "@/components/Preloader";
import Sist2Query from "@/Sist2Query";
import store from "@/store";
export default {
name: "LazyContentDiv",
@ -18,10 +20,72 @@ export default {
}
},
mounted() {
Sist2Api.getDocInfo(this.docId).then(src => {
this.content = src.data.content;
const query = Sist2Query.searchQuery();
if (this.$store.state.optHighlight) {
const fields = this.$store.state.fuzzy
? {"content.nGram": {}}
: {content: {}};
query.highlight = {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
number_of_fragments: 0,
fields,
};
if (!store.state.sist2Info.esVersionLegacy) {
query.highlight.max_analyzed_offset = 999_999;
}
}
if ("function_score" in query.query) {
query.query = query.query.function_score.query;
}
if (!("must" in query.query.bool)) {
query.query.bool.must = [];
} else if (!Array.isArray(query.query.bool.must)) {
query.query.bool.must = [query.query.bool.must];
}
query.query.bool.must.push({match: {_id: this.docId}});
delete query["sort"];
delete query["aggs"];
delete query["search_after"];
delete query.query["function_score"];
query._source = {
includes: ["content", "name", "path", "extension"]
}
query.size = 1;
Sist2Api.esQuery(query).then(resp => {
this.loading = false;
})
if (resp.hits.hits.length === 1) {
this.content = this.getContent(resp.hits.hits[0]);
} else {
console.log("FIXME: could not get content")
console.log(resp)
}
});
},
methods: {
getContent(doc) {
if (!doc.highlight) {
return doc._source.content;
}
if (doc.highlight["content.nGram"]) {
return doc.highlight["content.nGram"][0];
}
if (doc.highlight.content) {
return doc.highlight.content[0];
}
}
}
}
</script>

View File

@ -1,6 +1,7 @@
<template>
<div>
<div :class="{'disable-animations': $store.state.optSimpleLightbox}">
<FsLightbox
ref="lightbox"
:key="lightboxKey"
:toggler="showLightbox"
:sources="lightboxSources"
@ -10,7 +11,7 @@
:source-index="lightboxSlide"
:custom-toolbar-buttons="customButtons"
:slideshow-time="$store.getters.optLightboxSlideDuration * 1000"
:zoom-increment="0.5"
:zoom-increment="0.25"
:load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent"
:on-close="onClose"
:on-open="onShow"
@ -29,6 +30,7 @@ export default {
components: {FsLightbox},
data() {
return {
disableAnimations: true,
customButtons: [
{
viewBox: "0 0 384.928 384.928",
@ -64,7 +66,84 @@ export default {
return this.$store.getters["uiLightboxTypes"];
}
},
mounted() {
const listener = document.onkeydown;
document.onkeydown = (e) => {
const ret = this.keyDownListener(e)
if (listener && ret) {
return listener(e);
}
};
},
methods: {
keyDownListener(e) {
const isLightboxOpen = this.$refs.lightbox === undefined || this.$refs.lightbox.$el.tagName === undefined;
if (isLightboxOpen) {
return true;
}
const lightboxStore = this.$refs.lightbox.fsLightboxStore.slice(-1)[0];
switch (e.key) {
case " ": {
e.preventDefault();
e.stopPropagation();
e.stopImmediatePropagation();
// Find video at current slide, toggle play/pause
[...document.getElementsByClassName("fslightbox-absoluted")].forEach(elem => {
if (elem.style.transform === "translate(0px)" || elem.style.transform === "translate(0px, 0px)") {
const vid = elem.getElementsByTagName("video")[0];
if (vid) {
if (vid.paused) {
vid.play();
} else {
vid.pause()
}
}
}
return false;
});
return false;
}
case "ArrowUp":
case "k": {
if (!lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "ArrowDown":
case "j": {
if (lightboxStore.data.isThumbing && lightboxStore.core.thumbsToggler) {
lightboxStore.core.thumbsToggler.toggleThumbs();
}
return false;
}
case "h": {
if (lightboxStore.core.stageManager.getPreviousSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getPreviousSlideIndex());
}
return false;
}
case "l": {
if (lightboxStore.core.stageManager.getNextSlideIndex) {
lightboxStore.core.slideIndexChanger.jumpTo(lightboxStore.core.stageManager.getNextSlideIndex());
}
return false;
}
}
return true;
},
onDownloadClick() {
const url = this.lightboxSources[this.lightboxSlide];
@ -125,4 +204,20 @@ export default {
.fslightbox-toolbar-button:nth-child(7) {
order: 7;
}
.disable-animations .fslightbox-container {
background: rgba(30,30,30,.9);
}
.disable-animations .fslightbox-transform-transition {
transition: none;
}
.disable-animations .fslightbox-fade-in-strong {
animation: none;
}
.fslightbox-container video, .fslightbox-container img {
cursor: unset !important;
}
</style>

View File

@ -1,5 +1,13 @@
<template>
<div id="tagTree"></div>
<div>
<b-input-group v-if="showSearchBar" id="tag-picker-filter-bar">
<b-form-input :value="filter"
:placeholder="$t('tagFilter')"
@input="onFilter($event)"></b-form-input>
</b-input-group>
<div id="tagTree"></div>
</div>
</template>
<script>
@ -112,10 +120,12 @@ function addTag(map, tag, id, count) {
export default {
name: "TagPicker",
props: ["showSearchBar"],
data() {
return {
tagTree: null,
loadedFromArgs: false,
filter: ""
}
},
mounted() {
@ -129,6 +139,10 @@ export default {
});
},
methods: {
onFilter(value) {
this.filter = value;
this.tagTree.search(value);
},
initializeTree() {
const tagMap = [];
this.tagTree = new InspireTree({
@ -163,7 +177,8 @@ export default {
});
},
handleTreeClick(node, e) {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused") {
if (e === "indeterminate" || e === "collapsed" || e === 'rendered' || e === "focused"
|| e === "matched" || e === "hidden") {
return;
}
@ -180,7 +195,15 @@ export default {
}
</style>
<style>
.inspire-tree .focused>.wholerow {
.inspire-tree .focused > .wholerow {
border: none;
}
#tag-picker-filter-bar {
padding: 10px 4px 4px;
}
.theme-black .inspire-tree .matched > .wholerow {
background: rgba(251, 191, 41, 0.25);
}
</style>

View File

@ -16,6 +16,7 @@ export default {
pages: "pages",
mimeTypes: "Media types",
tags: "Tags",
tagFilter: "Filter tags",
help: {
simpleSearch: "Simple search",
advancedSearch: "Advanced search",
@ -72,7 +73,9 @@ export default {
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time",
useDatePicker: "Use a Date Picker component rather than a slider",
vidPreviewInterval: "Video preview frame duration in ms"
vidPreviewInterval: "Video preview frame duration in ms",
simpleLightbox: "Disable animations in image viewer",
showTagPickerFilter: "Display the tag filter bar"
},
queryMode: {
simple: "Simple",
@ -182,6 +185,7 @@ export default {
pages: "pages",
mimeTypes: "Types de médias",
tags: "Tags",
tagFilter: "Filtrer les tags",
help: {
simpleSearch: "Recherche simple",
advancedSearch: "Recherche avancée",
@ -239,7 +243,9 @@ export default {
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel",
useDatePicker: "Afficher un composant « Date Picker » plutôt qu'un slider",
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes"
vidPreviewInterval: "Durée des images d'aperçu video en millisecondes",
simpleLightbox: "Désactiver les animations du visualiseur d'images",
showTagPickerFilter: "Afficher le filtre dans l'onglet Tags"
},
queryMode: {
simple: "Simple",
@ -350,6 +356,7 @@ export default {
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
tagFilter: "筛选标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
@ -406,7 +413,9 @@ export default {
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新",
useDatePicker: "使用日期选择器组件而不是滑块",
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位"
vidPreviewInterval: "视频预览帧的持续时间,以毫秒为单位",
simpleLightbox: "在图片查看器中,禁用动画",
showTagPickerFilter: "显示标签过滤栏"
},
queryMode: {
simple: "简单",

View File

@ -4,6 +4,8 @@ import VueRouter, {Route} from "vue-router";
import {EsHit, EsResult, EsTag, Index, Tag} from "@/Sist2Api";
import {deserializeMimes, serializeMimes} from "@/util";
const CONF_VERSION = 2;
Vue.use(Vuex)
export default new Vuex.Store({
@ -24,7 +26,6 @@ export default new Vuex.Store({
sortMode: "score",
fuzzy: false,
size: 60,
optLang: "en",
optLangIsDefault: true,
@ -32,6 +33,7 @@ export default new Vuex.Store({
optTheme: "light",
optDisplay: "grid",
optSize: 60,
optHighlight: true,
optTagOrOperator: false,
optFuzzy: true,
@ -51,6 +53,8 @@ export default new Vuex.Store({
optUpdateMimeMap: false,
optUseDatePicker: false,
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@ -149,7 +153,7 @@ export default new Vuex.Store({
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.size = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
setOptTreemapType: (state, val) => state.optTreemapType = val,
@ -161,6 +165,8 @@ export default new Vuex.Store({
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptUseDatePicker: (state, val) => state.optUseDatePicker = val,
setOptVidPreviewInterval: (state, val) => state.optVidPreviewInterval = val,
setOptSimpleLightbox: (state, val) => state.optSimpleLightbox = val,
setOptShowTagPickerFilter: (state, val) => state.optShowTagPickerFilter = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
@ -239,6 +245,11 @@ export default new Vuex.Store({
}
},
async updateArgs({state}, router: VueRouter) {
if (router.currentRoute.path !== "/") {
return;
}
await router.push({
query: {
q: state.searchText.trim() ? state.searchText.trim().replace(/\s+/g, " ") : undefined,
@ -267,6 +278,8 @@ export default new Vuex.Store({
}
});
conf["version"] = CONF_VERSION;
localStorage.setItem("sist2_configuration", JSON.stringify(conf));
},
loadConfiguration({state}) {
@ -274,6 +287,11 @@ export default new Vuex.Store({
if (confString) {
const conf = JSON.parse(confString);
if (!("version" in conf) || conf["version"] != CONF_VERSION) {
localStorage.removeItem("sist2_configuration");
window.location.reload();
}
Object.keys(state).forEach((key) => {
if (key.startsWith("opt")) {
(state as any)[key] = conf[key];
@ -335,7 +353,7 @@ export default new Vuex.Store({
searchText: state => state.searchText,
pathText: state => state.pathText,
fuzzy: state => state.fuzzy,
size: state => state.size,
size: state => state.optSize,
sortMode: state => state.sortMode,
lastQueryResult: state => state.lastQueryResults,
lastDoc: function (state): EsHit | null {
@ -373,10 +391,12 @@ export default new Vuex.Store({
optTreemapColor: state => state.optTreemapColor,
optLightboxLoadOnlyCurrent: state => state.optLightboxLoadOnlyCurrent,
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size,
optResultSize: state => state.optSize,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
optUseDatePicker: state => state.optUseDatePicker,
optVidPreviewInterval: state => state.optVidPreviewInterval,
optSimpleLightbox: state => state.optSimpleLightbox,
optShowTagPickerFilter: state => state.optShowTagPickerFilter,
}
})

View File

@ -45,6 +45,16 @@
<b-form-checkbox :checked="optUseDatePicker" @input="setOptUseDatePicker">
{{ $t("opt.useDatePicker") }}
</b-form-checkbox>
<b-form-checkbox :checked="optSimpleLightbox" @input="setOptSimpleLightbox">{{
$t("opt.simpleLightbox")
}}
</b-form-checkbox>
<b-form-checkbox :checked="optShowTagPickerFilter" @input="setOptShowTagPickerFilter">{{
$t("opt.showTagPickerFilter")
}}
</b-form-checkbox>
</b-card>
<br/>
@ -239,6 +249,8 @@ export default {
"optUpdateMimeMap",
"optUseDatePicker",
"optVidPreviewInterval",
"optSimpleLightbox",
"optShowTagPickerFilter",
]),
clientWidth() {
return window.innerWidth;
@ -285,6 +297,8 @@ export default {
"setOptUpdateMimeMap",
"setOptUseDatePicker",
"setOptVidPreviewInterval",
"setOptSimpleLightbox",
"setOptShowTagPickerFilter",
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@ -56,6 +56,22 @@ export default Vue.extend({
onThumbnailClick() {
window.open(`/f/${this.doc._id}`, "_blank");
},
findByCustomField(field, id) {
return {
query: {
bool: {
must: [
{
match: {
[field]: id
}
}
]
}
},
size: 1
}
},
findById(id) {
return {
query: {
@ -103,6 +119,8 @@ export default Vue.extend({
query = this.findById(this.$route.query.byId);
} else if (this.$route.query.byName) {
query = this.findByName(this.$route.query.byName);
} else if (this.$route.query.by && this.$route.query.q) {
query = this.findByCustomField(this.$route.query.by, this.$route.query.q)
}
if (query) {

View File

@ -32,7 +32,7 @@
<MimePicker></MimePicker>
</b-tab>
<b-tab :title="$t('tags')">
<TagPicker></TagPicker>
<TagPicker :show-search-bar="$store.state.optShowTagPickerFilter"></TagPicker>
</b-tab>
</b-tabs>
</b-col>
@ -139,7 +139,9 @@ export default Vue.extend({
this.setSist2Info(data);
this.setIndices(data.indices);
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
const doBlankSearch = !this.$store.state.optUpdateMimeMap;
Sist2Api.getMimeTypes(Sist2Query.searchQuery(doBlankSearch)).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false;
this.search(true);
@ -206,7 +208,7 @@ export default Vue.extend({
this.$store.commit("setUiReachedScrollEnd", false);
},
async handleSearch(resp: EsResult) {
if (resp.hits.hits.length == 0) {
if (resp.hits.hits.length == 0 || resp.hits.hits.length < this.$store.state.optSize) {
this.$store.commit("setUiReachedScrollEnd", true);
}
@ -246,6 +248,8 @@ export default Vue.extend({
this.$store.commit("setLastQueryResult", resp);
this.docs.push(...resp.hits.hits);
resp.hits.hits.forEach(hit => this.docIds.add(hit._id));
},
getDateRange(): Promise<{ min: number, max: number }> {
return sist2.esQuery({

View File

@ -81,6 +81,11 @@ void web_args_destroy(web_args_t *args) {
}
void exec_args_destroy(exec_args_t *args) {
if (args->index_path != NULL) {
free(args->index_path);
}
free(args);
}
@ -124,6 +129,9 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->tn_count = DEFAULT_THUMBNAIL_COUNT;
} else if (args->tn_count == OPTION_VALUE_DISABLE) {
args->tn_count = 0;
} else if (args->tn_count > 1000) {
printf("Invalid value --thumbnail-count argument: %d. Must be <= 1000.\n", args->tn_size);
return 1;
}
if (args->content_size == OPTION_VALUE_UNSPECIFIED) {

View File

@ -85,7 +85,7 @@ typedef struct web_args {
typedef struct exec_args {
char *es_url;
char *es_index;
const char *index_path;
char *index_path;
const char *script_path;
int async_script;
char *script;

View File

@ -21,6 +21,8 @@ void free_queue(int max);
void elastic_flush();
void print_error(response_t *r);
void destroy_indexer(es_indexer_t *indexer) {
if (indexer == NULL) {
@ -45,7 +47,7 @@ void elastic_cleanup() {
destroy_indexer(Indexer);
}
void print_json(cJSON *document, const char id_str[MD5_STR_LENGTH]) {
void print_json(cJSON *document, const char id_str[SIST_DOC_ID_LEN]) {
cJSON *line = cJSON_CreateObject();
@ -72,19 +74,19 @@ void delete_document(const char* document_id_str, void* UNUSED(_data)) {
bulk_line->type = ES_BULK_LINE_DELETE;
bulk_line->next = NULL;
memcpy(bulk_line->path_md5_str, document_id_str, MD5_STR_LENGTH);
strcpy(bulk_line->doc_id, document_id_str);
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
void index_json(cJSON *document, const char doc_id[SIST_DOC_ID_LEN]) {
char *json = cJSON_PrintUnformatted(document);
size_t json_len = strlen(json);
es_bulk_line_t *bulk_line = malloc(sizeof(es_bulk_line_t) + json_len + 2);
bulk_line->type = ES_BULK_LINE_INDEX;
memcpy(bulk_line->line, json, json_len);
memcpy(bulk_line->path_md5_str, index_id_str, MD5_STR_LENGTH);
strcpy(bulk_line->doc_id, doc_id);
*(bulk_line->line + json_len) = '\n';
*(bulk_line->line + json_len + 1) = '\0';
bulk_line->next = NULL;
@ -93,7 +95,7 @@ void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]) {
tpool_add_work(IndexCtx.pool, index_json_func, bulk_line);
}
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]) {
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]) {
if (Indexer == NULL) {
Indexer = create_indexer(IndexCtx.es_url, IndexCtx.es_index);
@ -108,16 +110,16 @@ void execute_update_script(const char *script, int async, const char index_id[MD
cJSON *term_obj = cJSON_AddObjectToObject(query, "term");
cJSON_AddStringToObject(term_obj, "index", index_id);
char *str = cJSON_Print(body);
char *str = cJSON_PrintUnformatted(body);
char bulk_url[4096];
char url[4096];
if (async) {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
snprintf(url, sizeof(url), "%s/%s/_update_by_query?wait_for_completion=false", Indexer->es_url,
Indexer->es_index);
} else {
snprintf(bulk_url, sizeof(bulk_url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
snprintf(url, sizeof(url), "%s/%s/_update_by_query", Indexer->es_url, Indexer->es_index);
}
response_t *r = web_post(bulk_url, str);
response_t *r = web_post(url, str);
if (!async) {
LOG_INFOF("elastic.c", "Executed user script <%d>", r->status_code);
}
@ -137,6 +139,11 @@ void execute_update_script(const char *script, int async, const char index_id[MD
if (async) {
cJSON *task = cJSON_GetObjectItem(resp, "task");
if (task == NULL) {
LOG_FATALF("elastic.c", "FIXME: Could not get task id: %s", r->body);
}
LOG_INFOF("elastic.c", "User script queued: %s/_tasks/%s", Indexer->es_url, task->valuestring);
}
@ -167,7 +174,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
snprintf(
action_str, sizeof(action_str),
"{\"index\":{\"_id\":\"%s\",\"_type\":\"_doc\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
line->doc_id, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@ -184,7 +191,7 @@ void *create_bulk_buffer(int max, int *count, size_t *buf_len) {
snprintf(
action_str, sizeof(action_str),
"{\"delete\":{\"_id\":\"%s\",\"_index\":\"%s\"}}\n",
line->path_md5_str, Indexer->es_index
line->doc_id, Indexer->es_index
);
size_t action_str_len = strlen(action_str);
@ -212,7 +219,13 @@ void print_errors(response_t *r) {
*(tmp + r->size) = '\0';
cJSON *ret_json = cJSON_Parse(tmp);
if (cJSON_GetObjectItem(ret_json, "errors")->valueint != 0) {
cJSON *errors = cJSON_GetObjectItem(ret_json, "errors");
if (errors == NULL) {
char *str = cJSON_Print(ret_json);
LOG_ERRORF("elastic.c", "%s\n", str);
cJSON_free(str);
} else if (errors->valueint != 0) {
cJSON *err;
cJSON_ArrayForEach(err, cJSON_GetObjectItem(ret_json, "items")) {
if (cJSON_GetObjectItem(cJSON_GetObjectItem(err, "index"), "status")->valueint != 201) {
@ -263,7 +276,7 @@ void _elastic_flush(int max) {
if (r->status_code == 413) {
if (max <= 1) {
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->path_md5_str)
LOG_ERRORF("elastic.c", "Single document too large, giving up: {%s}", Indexer->line_head->doc_id)
free_response(r);
free(buf);
free_queue(1);
@ -413,12 +426,20 @@ es_version_t *elastic_get_version(const char *es_url) {
*(tmp + r->size) = '\0';
cJSON *response = cJSON_Parse(tmp);
free(tmp);
free_response(r);
if (response == NULL) {
return NULL;
}
if (cJSON_GetObjectItem(response, "error") != NULL) {
LOG_WARNING("elastic.c", "Could not get Elasticsearch version")
print_error(r);
free_response(r);
return NULL;
}
free_response(r);
if (cJSON_GetObjectItem(response, "version") == NULL ||
cJSON_GetObjectItem(cJSON_GetObjectItem(response, "version"), "number") == NULL) {
cJSON_Delete(response);

View File

@ -8,7 +8,7 @@
typedef struct es_bulk_line {
struct es_bulk_line *next;
char path_md5_str[MD5_STR_LENGTH];
char doc_id[SIST_DOC_ID_LEN];
int type;
char line[0];
} es_bulk_line_t;
@ -20,8 +20,8 @@ typedef struct {
} es_version_t;
#define VERSION_GE(version, maj, min) ((version)->major > (maj) || ((version)->major == (maj) && (version)->minor >= (min)))
#define IS_SUPPORTED_ES_VERSION(es_version) VERSION_GE((es_version), 6, 8)
#define USE_LEGACY_ES_SETTINGS(es_version) (!VERSION_GE((es_version), 7, 14))
#define IS_SUPPORTED_ES_VERSION(es_version) ((es_version) != NULL && VERSION_GE((es_version), 6, 8))
#define USE_LEGACY_ES_SETTINGS(es_version) ((es_version) != NULL && (!VERSION_GE((es_version), 7, 14)))
__always_inline
static const char *format_es_version(es_version_t *version) {
@ -40,9 +40,9 @@ typedef struct es_indexer es_indexer_t;
void elastic_index_line(es_bulk_line_t *line);
void print_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void print_json(cJSON *document, const char index_id_str[SIST_INDEX_ID_LEN]);
void index_json(cJSON *document, const char index_id_str[MD5_STR_LENGTH]);
void index_json(cJSON *document, const char doc_id[SIST_INDEX_ID_LEN]);
void delete_document(const char *document_id_str, void* data);
@ -59,6 +59,6 @@ char *elastic_get_status();
es_version_t *elastic_get_version(const char *es_url);
void execute_update_script(const char *script, int async, const char index_id[MD5_STR_LENGTH]);
void execute_update_script(const char *script, int async, const char index_id[SIST_INDEX_ID_LEN]);
#endif

File diff suppressed because one or more lines are too long

View File

@ -22,7 +22,7 @@ void free_response(response_t *resp) {
free(resp);
}
void web_post_async_poll(subreq_ctx_t* req) {
void web_post_async_poll(subreq_ctx_t *req) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
@ -34,7 +34,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
CURLMcode mc = curl_multi_fdset(req->multi, &fdread, &fdwrite, &fdexcep, &maxfd);
if(mc != CURLM_OK) {
if (mc != CURLM_OK) {
req->done = TRUE;
return;
}
@ -47,7 +47,7 @@ void web_post_async_poll(subreq_ctx_t* req) {
struct timeval timeout = {1, 0};
int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
switch(rc) {
switch (rc) {
case -1:
req->done = TRUE;
break;
@ -142,6 +142,9 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_POST, 1);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
char err_buffer[CURL_ERROR_SIZE + 1] = {};
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, err_buffer);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
@ -151,12 +154,16 @@ response_t *web_post(const char *url, const char *data) {
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &resp->status_code);
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
resp->body = buffer.buf;
resp->size = buffer.cur;
if (resp->status_code == 0) {
LOG_ERRORF("web.c", "CURL Error: %s", err_buffer)
}
curl_easy_cleanup(curl);
curl_slist_free_all(headers);
return resp;
}
@ -175,7 +182,7 @@ response_t *web_put(const char *url, const char *data) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4 );
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");

View File

@ -124,9 +124,7 @@ char *build_json_string(document_t *doc) {
cJSON_AddStringToObject(json, "path", "");
}
char md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, md5_str);
cJSON_AddStringToObject(json, "_id", md5_str);
cJSON_AddStringToObject(json, "_id", doc->doc_id);
// Metadata
meta_line_t *meta = doc->meta_head;
@ -452,32 +450,31 @@ void read_lines(const char *path, const line_processor_t processor) {
dyn_buffer_destroy(&buf);
fclose(file);
}
void read_index_ndjson(const char *line, void* _data) {
void** data = _data;
const char* index_id = data[0];
void read_index_ndjson(const char *line, void *_data) {
void **data = _data;
const char *index_id = data[0];
index_func func = data[1];
read_index_bin_handle_line(line, index_id, func);
}
void read_index(const char *path, const char index_id[MD5_STR_LENGTH], const char *type, index_func func) {
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func func) {
if (strcmp(type, INDEX_TYPE_NDJSON) == 0) {
read_lines(path, (line_processor_t) {
.data = (void*[2]){(void*)index_id, func} ,
.func = read_index_ndjson,
.data = (void *[2]) {(void *) index_id, func},
.func = read_index_ndjson,
});
}
}
static __thread GHashTable *IncrementalReadTable = NULL;
void json_put_incremental(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void json_put_incremental(cJSON *document, UNUSED(const char doc_id[SIST_DOC_ID_LEN])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
const int mtime = cJSON_GetObjectItem(document, "mtime")->valueint;
incremental_put_str(IncrementalReadTable, path_md5_str, mtime);
incremental_put(IncrementalReadTable, path_md5_str, mtime);
}
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc) {
@ -490,13 +487,11 @@ static __thread GHashTable *IncrementalNewTable = NULL;
static __thread store_t *IncrementalCopySourceStore = NULL;
static __thread store_t *IncrementalCopyDestinationStore = NULL;
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
unsigned char path_md5[MD5_DIGEST_LENGTH];
hex2buf(path_md5_str, MD5_STR_LENGTH - 1, path_md5);
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get_str(IncrementalCopyTable, path_md5_str)) {
if (cJSON_GetObjectItem(document, "parent") != NULL || incremental_get(IncrementalCopyTable, doc_id)) {
// Copy index line
cJSON_DeleteItemFromObject(document, "index");
char *json_str = cJSON_PrintUnformatted(document);
@ -510,9 +505,9 @@ void incremental_copy_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_S
// Copy tn store contents
size_t buf_len;
char *buf = store_read(IncrementalCopySourceStore, (char *) path_md5, sizeof(path_md5), &buf_len);
char *buf = store_read(IncrementalCopySourceStore, (char *) doc_id, SIST_DOC_ID_LEN, &buf_len);
if (buf_len != 0) {
store_write(IncrementalCopyDestinationStore, (char *) path_md5, sizeof(path_md5), buf, buf_len);
store_write(IncrementalCopyDestinationStore, (char *) doc_id, SIST_DOC_ID_LEN, buf, buf_len);
free(buf);
}
}
@ -536,24 +531,24 @@ void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
read_index(filepath, "", INDEX_TYPE_NDJSON, incremental_copy_handle_doc);
}
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[MD5_STR_LENGTH])) {
void incremental_delete_handle_doc(cJSON *document, UNUSED(const char id_str[SIST_DOC_ID_LEN])) {
char path_md5_n[MD5_STR_LENGTH + 1];
path_md5_n[MD5_STR_LENGTH] = '\0';
path_md5_n[MD5_STR_LENGTH - 1] = '\n';
const char *path_md5_str = cJSON_GetObjectItem(document, "_id")->valuestring;
char doc_id_n[SIST_DOC_ID_LEN + 1];
doc_id_n[SIST_DOC_ID_LEN] = '\0';
doc_id_n[SIST_DOC_ID_LEN - 1] = '\n';
const char *doc_id = cJSON_GetObjectItem(document, "_id")->valuestring;
// do not delete archive virtual entries
if (cJSON_GetObjectItem(document, "parent") == NULL
&& !incremental_get_str(IncrementalCopyTable, path_md5_str)
&& !incremental_get_str(IncrementalNewTable, path_md5_str)
&& !incremental_get(IncrementalCopyTable, doc_id)
&& !incremental_get(IncrementalNewTable, doc_id)
) {
memcpy(path_md5_n, path_md5_str, MD5_STR_LENGTH - 1);
zstd_write_string(path_md5_n, MD5_STR_LENGTH);
memcpy(doc_id_n, doc_id, SIST_DOC_ID_LEN - 1);
zstd_write_string(doc_id, sizeof(doc_id_n));
}
}
void incremental_delete(const char *del_filepath, const char* index_filepath,
void incremental_delete(const char *del_filepath, const char *index_filepath,
GHashTable *copy_table, GHashTable *new_table) {
if (WriterCtx.out_file == NULL) {

View File

@ -12,7 +12,7 @@ typedef struct line_processor {
void (*func)(const char*, void*);
} line_processor_t;
typedef void(*index_func)(cJSON *, const char[MD5_STR_LENGTH]);
typedef void(*index_func)(cJSON *, const char[SIST_DOC_ID_LEN]);
void incremental_copy(store_t *store, store_t *dst_store, const char *filepath,
const char *dst_filepath, GHashTable *copy_table);
@ -24,7 +24,7 @@ void write_document(document_t *doc);
void read_lines(const char *path, const line_processor_t processor);
void read_index(const char *path, const char[MD5_STR_LENGTH], const char *type, index_func);
void read_index(const char *path, const char index_id[SIST_INDEX_ID_LEN], const char *type, index_func);
void incremental_read(GHashTable *table, const char *filepath, index_descriptor_t *desc);
@ -42,13 +42,13 @@ index_descriptor_t read_index_descriptor(char *path);
// caller ensures char file_path[PATH_MAX]
#define READ_INDICES(file_path, index_path, action_ok, action_main_fail, cond_original) \
snprintf(file_path, PATH_MAX, "%s_index_main.ndjson.zst", index_path); \
if (0 == access(file_path, R_OK)) { \
if (access(file_path, R_OK) == 0) { \
action_ok; \
} else { \
action_main_fail; \
} \
snprintf(file_path, PATH_MAX, "%s_index_original.ndjson.zst", index_path); \
if ((cond_original) && (0 == access(file_path, R_OK))) { \
if ((cond_original) && access(file_path, R_OK) == 0) { \
action_ok; \
} \

View File

@ -52,22 +52,7 @@ void store_flush(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {
if (LogCtx.very_verbose) {
if (key_len == MD5_DIGEST_LENGTH) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", path_md5_str, buf_len)
} else if (key_len == MD5_DIGEST_LENGTH + sizeof(int)) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex((unsigned char *) key, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF("store.c", "Store write {%s/%d} %lu bytes",
path_md5_str, *(int *) (key + MD5_DIGEST_LENGTH), buf_len);
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
LOG_DEBUGF("store.c", "Store write %s@{%s} %lu bytes", store->path, key, buf_len)
}
#if (SIST_FAKE_STORE != 1)

View File

@ -22,7 +22,7 @@ parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info,
job->vfile.info = *info;
memset(job->parent, 0, MD5_DIGEST_LENGTH);
job->parent[0] = '\0';
job->vfile.filepath = job->filepath;
job->vfile.read = fs_read;

View File

@ -38,8 +38,8 @@ static __sighandler_t sigabrt_handler = NULL;
void sig_handler(int signum) {
LogCtx.verbose = 1;
LogCtx.very_verbose = 1;
LogCtx.verbose = TRUE;
LogCtx.very_verbose = TRUE;
LOG_ERROR("*SIGNAL HANDLER*", "=============================================\n\n");
LOG_ERRORF("*SIGNAL HANDLER*", "Uh oh! Caught fatal signal: %s", strsignal(signum));
@ -103,7 +103,7 @@ void sig_handler(int signum) {
exit(-1);
}
void init_dir(const char *dirpath, scan_args_t* args) {
void init_dir(const char *dirpath, scan_args_t *args) {
char path[PATH_MAX];
snprintf(path, PATH_MAX, "%sdescriptor.json", dirpath);
@ -112,16 +112,16 @@ void init_dir(const char *dirpath, scan_args_t* args) {
strcpy(ScanCtx.index.desc.type, INDEX_TYPE_NDJSON);
if (args->incremental != NULL) {
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
// copy old index id
char descriptor_path[PATH_MAX];
snprintf(descriptor_path, PATH_MAX, "%sdescriptor.json", args->incremental);
index_descriptor_t original_desc = read_index_descriptor(descriptor_path);
memcpy(ScanCtx.index.desc.id, original_desc.id, sizeof(original_desc.id));
} else {
// genreate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
// generate new index id based on timestamp
unsigned char index_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) &ScanCtx.index.desc.timestamp, sizeof(ScanCtx.index.desc.timestamp), index_md5);
buf2hex(index_md5, MD5_DIGEST_LENGTH, ScanCtx.index.desc.id);
}
write_index_descriptor(path, &ScanCtx.index.desc);
@ -324,9 +324,13 @@ void load_incremental_index(const scan_args_t *args) {
LOG_FATALF("main.c", "Version mismatch! Index is %s but executable is %s", original_desc.version, Version)
}
READ_INDICES(file_path, args->incremental, incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
1);
READ_INDICES(
file_path,
args->incremental,
incremental_read(ScanCtx.original_table, file_path, &original_desc),
LOG_FATALF("main.c", "Could not open original main index for incremental scan: %s", strerror(errno)),
TRUE
);
LOG_INFOF("main.c", "Loaded %d items in to mtime table.", g_hash_table_size(ScanCtx.original_table))
}
@ -534,6 +538,7 @@ void sist2_exec_script(exec_args_t *args) {
IndexCtx.es_url = args->es_url;
IndexCtx.es_index = args->es_index;
IndexCtx.needs_es_connection = TRUE;
LOG_DEBUGF("main.c", "descriptor version %s (%s)", desc.version, desc.type)
@ -776,9 +781,8 @@ int main(int argc, const char *argv[]) {
sist2_exec_script(exec_args);
} else {
fprintf(stderr, "Invalid command: '%s'\n", argv[0]);
argparse_usage(&argparse);
goto end;
LOG_FATALF("main.c", "Invalid command: '%s'\n", argv[0])
}
printf("\n");

View File

@ -69,7 +69,7 @@ void parse(void *arg) {
doc->base = (short) job->base;
char *rel_path = doc->filepath + ScanCtx.index.desc.root_len;
MD5((unsigned char *) rel_path, strlen(rel_path), doc->path_md5);
generate_doc_id(rel_path, doc->doc_id);
doc->meta_head = NULL;
doc->meta_tail = NULL;
@ -77,10 +77,10 @@ void parse(void *arg) {
doc->size = job->vfile.info.st_size;
doc->mtime = (int) job->vfile.info.st_mtim.tv_sec;
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
int inc_ts = incremental_get(ScanCtx.original_table, doc->doc_id);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.copy_table, doc->path_md5);
incremental_mark_file(ScanCtx.copy_table, doc->doc_id);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
@ -96,16 +96,14 @@ void parse(void *arg) {
if (ScanCtx.new_table != NULL) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file(ScanCtx.new_table, doc->path_md5);
incremental_mark_file(ScanCtx.new_table, doc->doc_id);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
}
char *buf[MAGIC_BUF_SIZE];
if (LogCtx.very_verbose) {
char path_md5_str[MD5_STR_LENGTH];
buf2hex(doc->path_md5, MD5_DIGEST_LENGTH, path_md5_str);
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", path_md5_str)
LOG_DEBUGF(job->filepath, "Starting parse job {%s}", doc->doc_id)
}
if (job->vfile.info.st_size == 0) {
@ -218,10 +216,10 @@ void parse(void *arg) {
abort:
//Parent meta
if (!md5_digest_is_null(job->parent)) {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + MD5_STR_LENGTH);
if (job->parent[0] != '\0') {
meta_line_t *meta_parent = malloc(sizeof(meta_line_t) + SIST_INDEX_ID_LEN);
meta_parent->key = MetaParent;
buf2hex(job->parent, MD5_DIGEST_LENGTH, meta_parent->str_val);
strcpy(meta_parent->str_val, job->parent);
APPEND_META((doc), meta_parent)
doc->has_parent = TRUE;

View File

@ -23,16 +23,19 @@ void parse_sidecar(vfile_t *vfile, document_t *doc) {
}
char *json_str = cJSON_PrintUnformatted(json);
unsigned char path_md5[MD5_DIGEST_LENGTH];
MD5((unsigned char *) vfile->filepath + ScanCtx.index.desc.root_len, doc->ext - 1 - ScanCtx.index.desc.root_len,
path_md5);
char assoc_doc_id[SIST_DOC_ID_LEN];
char path_md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, path_md5_str);
char rel_path[PATH_MAX];
size_t rel_path_len = doc->ext - 1 - ScanCtx.index.desc.root_len;
memcpy(rel_path, vfile->filepath + ScanCtx.index.desc.root_len, rel_path_len);
*(rel_path + rel_path_len) = '\0';
store_write(ScanCtx.index.meta_store, path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
generate_doc_id(rel_path, assoc_doc_id);
store_write(ScanCtx.index.meta_store, assoc_doc_id, sizeof(assoc_doc_id), json_str,
strlen(json_str) + 1);
cJSON_Delete(json);
free(json_str);
free(buf);
}
}

View File

@ -27,10 +27,6 @@
#define UNUSED(x) __attribute__((__unused__)) x
#define MD5_STR_LENGTH 33
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#include "util.h"
#include "log.h"
#include "types.h"
@ -53,14 +49,14 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "2.11.7"
#define VERSION "2.12.0"
static const char *const Version = VERSION;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.3"
#define EXPECTED_MONGOOSE_VERSION "7.6"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@ -20,7 +20,7 @@ typedef struct {
long count;
} agg_t;
void fill_tables(cJSON *document, UNUSED(const char index_id[MD5_STR_LENGTH])) {
void fill_tables(cJSON *document, UNUSED(const char index_id[SIST_INDEX_ID_LEN])) {
if (cJSON_GetObjectItem(document, "parent") != NULL) {
return;

View File

@ -4,7 +4,7 @@
#define INDEX_TYPE_NDJSON "ndjson"
typedef struct index_descriptor {
char id[MD5_STR_LENGTH];
char id[SIST_INDEX_ID_LEN];
char version[64];
long timestamp;
char root[PATH_MAX];

View File

@ -10,8 +10,6 @@
#include "third-party/utf8.h/utf8.h"
#include "libscan/scan.h"
#define MD5_STR_LENGTH 33
char *abspath(const char *path);
@ -94,40 +92,24 @@ static void buf2hex(const unsigned char *buf, size_t buflen, char *hex_string) {
__always_inline
static int md5_digest_is_null(const unsigned char digest[MD5_DIGEST_LENGTH]) {
return (*(int64_t *) digest) == 0 && (*((int64_t *) digest + 1)) == 0;
static void generate_doc_id(const char *rel_path, char *doc_id) {
unsigned char md[MD5_DIGEST_LENGTH];
MD5((unsigned char *) rel_path, strlen(rel_path), md);
buf2hex(md, sizeof(md), doc_id);
}
__always_inline
static void incremental_put(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH], int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
static void incremental_put(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN], int mtime) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static void incremental_put_str(GHashTable *table, const char *path_md5, int mtime) {
char *ptr = malloc(MD5_STR_LENGTH);
strcpy(ptr, path_md5);
g_hash_table_insert(table, ptr, GINT_TO_POINTER(mtime));
}
__always_inline
static int incremental_get(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
static int incremental_get(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
if (table != NULL) {
char md5_str[MD5_STR_LENGTH];
buf2hex(path_md5, MD5_DIGEST_LENGTH, md5_str);
return GPOINTER_TO_INT(g_hash_table_lookup(table, md5_str));
} else {
return 0;
}
}
__always_inline
static int incremental_get_str(GHashTable *table, const char *path_md5) {
if (table != NULL) {
return GPOINTER_TO_INT(g_hash_table_lookup(table, path_md5));
return GPOINTER_TO_INT(g_hash_table_lookup(table, doc_id));
} else {
return 0;
}
@ -138,9 +120,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
* !!Not thread safe.
*/
__always_inline
static int incremental_mark_file(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);
buf2hex(path_md5, MD5_DIGEST_LENGTH, ptr);
static int incremental_mark_file(GHashTable *table, const char doc_id[SIST_DOC_ID_LEN]) {
char *ptr = malloc(SIST_DOC_ID_LEN);
strcpy(ptr, doc_id);
return g_hash_table_insert(table, ptr, GINT_TO_POINTER(1));
}

View File

@ -12,6 +12,13 @@
#define HTTP_TEXT_TYPE_HEADER "Content-Type: text/plain;charset=utf-8\r\n"
#define HTTP_REPLY_NOT_FOUND mg_http_reply(nc, 404, HTTP_SERVER_HEADER HTTP_TEXT_TYPE_HEADER, "Not found");
static struct mg_http_serve_opts DefaultServeOpts = {
.fs = NULL,
.ssi_pattern = NULL,
.root_dir = NULL,
.mime_types = ""
};
static void send_response_line(struct mg_connection *nc, int status_code, size_t length, char *extra_headers) {
mg_printf(
@ -29,7 +36,7 @@ static void send_response_line(struct mg_connection *nc, int status_code, size_t
index_t *get_index_by_id(const char *index_id) {
for (int i = WebCtx.index_count; i >= 0; i--) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, MD5_STR_LENGTH) == 0) {
if (strncmp(index_id, WebCtx.indices[i].desc.id, SIST_INDEX_ID_LEN) == 0) {
return &WebCtx.indices[i];
}
}
@ -54,7 +61,7 @@ store_t *get_tag_store(const char *index_id) {
void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", "text/html", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/index.html", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(index_html), "Content-Type: text/html");
mg_send(nc, index_html, sizeof(index_html));
@ -63,23 +70,23 @@ void search_index(struct mg_connection *nc, struct mg_http_message *hm) {
void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_index_id[SIST_INDEX_ID_LEN];
memcpy(arg_index_id, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index_id + SIST_INDEX_ID_LEN - 1) = '\0';
index_t *index = get_index_by_id(arg_md5);
index_t *index = get_index_by_id(arg_index_id);
if (index == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
const char *file;
switch (atoi(hm->uri.ptr + 3 + MD5_STR_LENGTH)) {
switch (atoi(hm->uri.ptr + 3 + SIST_INDEX_ID_LEN)) {
case 1:
file = "treemap.csv";
break;
@ -104,12 +111,13 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
strcpy(full_path, index->path);
strcat(full_path, file);
mg_http_serve_file(nc, hm, full_path, "text/csv", disposition);
struct mg_http_serve_opts opts = {};
mg_http_serve_file(nc, hm, full_path, &opts);
}
void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", "application/javascript", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/index.js", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
mg_send(nc, index_js, sizeof(index_js));
@ -118,7 +126,7 @@ void javascript(struct mg_connection *nc, struct mg_http_message *hm) {
void javascript_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
if (WebCtx.dev) {
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", "application/javascript", NULL);
mg_http_serve_file(nc, hm, "sist2-vue/dist/js/chunk-vendors.js", &DefaultServeOpts);
} else {
send_response_line(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
@ -142,28 +150,25 @@ void style_vendor(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
int parse_tn_num = FALSE;
int has_thumbnail_index = FALSE;
if (hm->uri.len != 68) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2) {
if (hm->uri.len != 68 + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2 + 4) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
parse_tn_num = TRUE;
has_thumbnail_index = TRUE;
}
char arg_file_md5[MD5_STR_LENGTH];
char arg_index[MD5_STR_LENGTH];
char arg_doc_id[SIST_DOC_ID_LEN];
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
memcpy(arg_file_md5, hm->uri.ptr + 3 + MD5_STR_LENGTH, MD5_STR_LENGTH);
*(arg_file_md5 + MD5_STR_LENGTH - 1) = '\0';
unsigned char md5_buf[MD5_DIGEST_LENGTH];
hex2buf(arg_file_md5, MD5_STR_LENGTH - 1, md5_buf);
memcpy(arg_index, hm->uri.ptr + 3, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
memcpy(arg_doc_id, hm->uri.ptr + 3 + SIST_INDEX_ID_LEN, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
store_t *store = get_store(arg_index);
if (store == NULL) {
@ -175,16 +180,18 @@ void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
char *data;
size_t data_len = 0;
if (parse_tn_num) {
int tn_num = atoi(hm->uri.ptr + 68);
if (has_thumbnail_index) {
const char *tn_index = hm->uri.ptr + SIST_INDEX_ID_LEN + SIST_DOC_ID_LEN + 2;
char tn_key[sizeof(md5_buf) + sizeof(int)];
memcpy(tn_key, md5_buf, sizeof(md5_buf));
memcpy(tn_key + sizeof(md5_buf), &tn_num, sizeof(tn_num));
char tn_key[sizeof(arg_doc_id) + sizeof(char) * 4];
memcpy(tn_key, arg_doc_id, sizeof(arg_doc_id));
memcpy(tn_key + sizeof(arg_doc_id) - 1, tn_index, sizeof(char) * 4);
*(tn_key + sizeof(tn_key) - 1) = '\0';
data = store_read(store, (char *) tn_key, sizeof(tn_key), &data_len);
} else {
data = store_read(store, (char *) md5_buf, sizeof(md5_buf), &data_len);
data = store_read(store, (char *) arg_doc_id, sizeof(arg_doc_id), &data_len);
}
if (data_len != 0) {
@ -274,10 +281,18 @@ void serve_file_from_disk(cJSON *json, index_t *idx, struct mg_connection *nc, s
char disposition[8192];
snprintf(disposition, sizeof(disposition),
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\nAccept-Ranges: bytes\r\n",
HTTP_SERVER_HEADER "Content-Disposition: inline; filename=\"%s%s%s\"\r\n"
"Accept-Ranges: bytes\r\nCache-Control: no-store\r\n",
name, strlen(ext) == 0 ? "" : ".", ext);
mg_http_serve_file(nc, hm, full_path, mime, disposition);
char mime_mapping[1024];
snprintf(mime_mapping, sizeof(mime_mapping), "%s=%s", ext, mime);
struct mg_http_serve_opts opts = {
.extra_headers = disposition,
.mime_types = mime_mapping
};
mg_http_serve_file(nc, hm, full_path, &opts);
}
void cache_es_version() {
@ -298,13 +313,18 @@ void index_info(struct mg_connection *nc) {
cache_es_version();
const char *es_version = "0.0.0";
if (WebCtx.es_version != NULL) {
es_version = format_es_version(WebCtx.es_version);
}
cJSON *json = cJSON_CreateObject();
cJSON *arr = cJSON_AddArrayToObject(json, "indices");
cJSON_AddStringToObject(json, "mongooseVersion", MG_VERSION);
cJSON_AddStringToObject(json, "esIndex", WebCtx.es_index);
cJSON_AddStringToObject(json, "version", Version);
cJSON_AddStringToObject(json, "esVersion", format_es_version(WebCtx.es_version));
cJSON_AddStringToObject(json, "esVersion", es_version);
cJSON_AddBoolToObject(json, "esVersionSupported", IS_SUPPORTED_ES_VERSION(WebCtx.es_version));
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
@ -339,55 +359,19 @@ void index_info(struct mg_connection *nc) {
}
void document_info(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
LOG_DEBUGF("serve.c", "Invalid document_info path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
cJSON *doc = elastic_get_document(arg_md5);
cJSON *source = cJSON_GetObjectItem(doc, "_source");
cJSON *index_id = cJSON_GetObjectItem(source, "index");
if (index_id == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
index_t *idx = get_index_by_id(index_id->valuestring);
if (idx == NULL) {
cJSON_Delete(doc);
HTTP_REPLY_NOT_FOUND
return;
}
char *json_str = cJSON_PrintUnformatted(source);
send_response_line(nc, 200, (int) strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, (int) strlen(json_str));
free(json_str);
cJSON_Delete(doc);
}
void file(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 2) {
if (hm->uri.len != SIST_DOC_ID_LEN + 2) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_md5[MD5_STR_LENGTH];
memcpy(arg_md5, hm->uri.ptr + 3, MD5_STR_LENGTH);
*(arg_md5 + MD5_STR_LENGTH - 1) = '\0';
char arg_doc_id[SIST_DOC_ID_LEN];
memcpy(arg_doc_id, hm->uri.ptr + 3, SIST_DOC_ID_LEN);
*(arg_doc_id + SIST_DOC_ID_LEN - 1) = '\0';
const char *next = arg_md5;
const char *next = arg_doc_id;
cJSON *doc = NULL;
cJSON *index_id = NULL;
cJSON *source = NULL;
@ -438,7 +422,6 @@ void status(struct mg_connection *nc) {
typedef struct {
char *name;
int delete;
char *path_md5_str;
char *doc_id;
} tag_req_t;
@ -458,12 +441,6 @@ tag_req_t *parse_tag_request(cJSON *json) {
return NULL;
}
cJSON *arg_path_md5 = cJSON_GetObjectItem(json, "path_md5");
if (arg_path_md5 == NULL || !cJSON_IsString(arg_path_md5) ||
strlen(arg_path_md5->valuestring) != MD5_STR_LENGTH - 1) {
return NULL;
}
cJSON *arg_doc_id = cJSON_GetObjectItem(json, "doc_id");
if (arg_doc_id == NULL || !cJSON_IsString(arg_doc_id)) {
return NULL;
@ -472,22 +449,21 @@ tag_req_t *parse_tag_request(cJSON *json) {
tag_req_t *req = malloc(sizeof(tag_req_t));
req->delete = arg_delete->valueint;
req->name = arg_name->valuestring;
req->path_md5_str = arg_path_md5->valuestring;
req->doc_id = arg_doc_id->valuestring;
return req;
}
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
if (hm->uri.len != MD5_STR_LENGTH + 4) {
if (hm->uri.len != SIST_INDEX_ID_LEN + 4) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr)
HTTP_REPLY_NOT_FOUND
return;
}
char arg_index[MD5_STR_LENGTH];
memcpy(arg_index, hm->uri.ptr + 5, MD5_STR_LENGTH);
*(arg_index + MD5_STR_LENGTH - 1) = '\0';
char arg_index[SIST_INDEX_ID_LEN];
memcpy(arg_index, hm->uri.ptr + 5, SIST_INDEX_ID_LEN);
*(arg_index + SIST_INDEX_ID_LEN - 1) = '\0';
if (hm->body.len < 2 || hm->method.len != 4 || memcmp(&hm->method, "POST", 4) == 0) {
LOG_DEBUG("serve.c", "Invalid tag request")
@ -519,7 +495,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *arr = NULL;
size_t data_len = 0;
const char *data = store_read(store, arg_req->path_md5_str, MD5_STR_LENGTH, &data_len);
const char *data = store_read(store, arg_req->doc_id, SIST_DOC_ID_LEN, &data_len);
if (data_len == 0) {
arr = cJSON_CreateArray();
} else {
@ -579,7 +555,7 @@ void tag(struct mg_connection *nc, struct mg_http_message *hm) {
}
char *json_str = cJSON_PrintUnformatted(arr);
store_write(store, arg_req->path_md5_str, MD5_STR_LENGTH, json_str, strlen(json_str) + 1);
store_write(store, arg_req->doc_id, SIST_DOC_ID_LEN, json_str, strlen(json_str) + 1);
store_flush(store);
free(arg_req);
@ -641,8 +617,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(vo
return;
}
tag(nc, hm);
} else if (mg_http_match_uri(hm, "/d/*")) {
document_info(nc, hm);
} else {
HTTP_REPLY_NOT_FOUND
}

File diff suppressed because one or more lines are too long

View File

@ -35,10 +35,20 @@ def sist2_index(files, *args):
path = copy_files(files)
shutil.rmtree("test_i", ignore_errors=True)
sist2("scan", path, "-o", "test_i", *args)
sist2("scan", path, "-o", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i"))
def get_lmdb_contents(path):
import lmdb
env = lmdb.open(path)
txn = env.begin(write=False)
return dict((k, v) for k, v in txn.cursor())
def sist2_incremental_index(files, func=None, incremental_index=False, *args):
path = copy_files(files)
@ -46,7 +56,7 @@ def sist2_incremental_index(files, func=None, incremental_index=False, *args):
func(path)
shutil.rmtree("test_i_inc", ignore_errors=True)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", *args)
sist2("scan", path, "-o", "test_i_inc", "--incremental", "test_i", "-t12", *args)
return iter(sist2_index_to_dict("test_i_inc", incremental_index))
@ -76,9 +86,31 @@ class ScanTest(unittest.TestCase):
pass
file_count = sum(1 for _ in sist2_index(TEST_FILES))
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, remove_files)), file_count - 2)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)), 3)
self.assertEqual(sum(1 for _ in sist2_incremental_index(TEST_FILES, add_files)), file_count + 3)
lmdb_full = get_lmdb_contents("test_i/thumbs")
# Remove files
num_files_rm1 = len(list(sist2_incremental_index(TEST_FILES, remove_files)))
lmdb_rm1 = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_rm1, file_count - 2)
self.assertEqual(len(set(lmdb_full.keys() - set(lmdb_rm1.keys()))), 2)
# add files (incremental_index=True)
num_files_add_inc = len(list(sist2_incremental_index(TEST_FILES, add_files, incremental_index=True)))
lmdb_add_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add_inc, 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add_inc.keys()))
# add files
num_files_add = len(list(sist2_incremental_index(TEST_FILES, add_files)))
lmdb_add = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(num_files_add, file_count + 3)
self.assertEqual(set(lmdb_full.keys()), set(lmdb_add.keys()))
# (No action)
sist2_incremental_index(TEST_FILES)
lmdb_inc = get_lmdb_contents("test_i_inc/thumbs")
self.assertEqual(set(lmdb_full.keys()), set(lmdb_inc.keys()))
if __name__ == "__main__":

View File

@ -6,26 +6,11 @@ set(CMAKE_C_STANDARD 11)
option(BUILD_TESTS "Build tests" on)
add_subdirectory(third-party/antiword)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
else()
add_compile_definitions(
antiword
NDEBUG
)
endif()
set(USE_LIBXML2 OFF CACHE BOOL "" FORCE)
set(USE_XMLWRITER OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
add_subdirectory(third-party/libmobi)
add_library(
scan
@ -48,6 +33,54 @@ add_library(
libscan/mobi/scan_mobi.c libscan/mobi/scan_mobi.h libscan/raw/raw.c libscan/raw/raw.h)
set_target_properties(scan PROPERTIES LINKER_LANGUAGE C)
if (SIST_DEBUG)
add_compile_definitions(
antiword
DEBUG
)
target_compile_options(
antiword
PRIVATE
-g
-fstack-protector
-fno-omit-frame-pointer
-fsanitize=address
-fno-inline
)
elseif (SIST_FAST)
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
-march=native
-fno-stack-protector
-fomit-frame-pointer
-freciprocal-math
)
else()
add_compile_definitions(
antiword
NDEBUG
)
target_compile_options(
scan
PRIVATE
-Ofast
#-march=native
-fno-stack-protector
-fomit-frame-pointer
#-freciprocal-math
)
endif()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib .so)
find_package(cJSON CONFIG REQUIRED)
@ -85,35 +118,15 @@ target_compile_options(
-g
)
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
libmobi
GIT_REPOSITORY https://github.com/simon987/libmobi.git
GIT_TAG "public"
UPDATE_COMMAND ""
PATCH_COMMAND ""
TEST_COMMAND ""
CONFIGURE_COMMAND ./autogen.sh && ./configure
INSTALL_COMMAND ""
PREFIX "third-party/ext_libmobi"
SOURCE_DIR "third-party/ext_libmobi/src/libmobi"
BINARY_DIR "third-party/ext_libmobi/src/libmobi"
BUILD_COMMAND ${MAKE_EXE} -j 8 --silent
)
SET(MOBI_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/.libs/)
SET(MOBI_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libmobi/src/libmobi/src/)
if (SIST_DEBUG)
SET(FFMPEG_DEBUG "--enable-debug=3" "--disable-optimizations")
else()
SET(FFMPEG_DEBUG "")
endif()
include(ExternalProject)
find_program(MAKE_EXE NAMES gmake nmake make)
ExternalProject_Add(
ffmpeg
GIT_REPOSITORY https://git.ffmpeg.org/ffmpeg.git
@ -159,10 +172,10 @@ SET(WPD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/ext_libwpd/src/libwp
add_dependencies(
scan
libmobi
ffmpeg
antiword
libwpd
mobi
)
target_link_libraries(
@ -180,8 +193,6 @@ target_link_libraries(
${MUPDF_LIB}
openjp2
${MOBI_LIB_DIR}/libmobi.a
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
@ -218,6 +229,7 @@ target_link_libraries(
${GUMBO_LIB}
dl
antiword
mobi
unofficial::pcre::pcre unofficial::pcre::pcre16 unofficial::pcre::pcre32 unofficial::pcre::pcrecpp
)

View File

@ -202,7 +202,7 @@ scan_code_t parse_archive(scan_arc_ctx_t *ctx, vfile_t *f, document_t *doc, pcre
sub_job->vfile.logf = ctx->logf;
sub_job->vfile.has_checksum = FALSE;
sub_job->vfile.calculate_checksum = f->calculate_checksum;
memcpy(sub_job->parent, doc->path_md5, MD5_DIGEST_LENGTH);
strcpy(sub_job->parent, doc->doc_id);
while (archive_read_next_header(a, &entry) == ARCHIVE_OK) {
sub_job->vfile.info = *archive_entry_stat(entry);

View File

@ -156,7 +156,7 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
free(samples);
av_packet_unref(&jpeg_packet);

View File

@ -232,7 +232,7 @@ void parse_font(scan_font_ctx_t *ctx, vfile_t *f, document_t *doc) {
bmp_format(&bmp_data, dimensions, bitmap);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) bmp_data.buf, bmp_data.cur);
ctx->store(doc->doc_id, sizeof(doc->doc_id), (char *) bmp_data.buf, bmp_data.cur);
dyn_buffer_destroy(&bmp_data);
free(bitmap);

View File

@ -20,8 +20,10 @@
#undef ABS
#define ABS(a) (((a) < 0) ? -(a) : (a))
#define SHA1_STR_LENGTH 41
#define SHA1_DIGEST_LENGTH 20
#define SHA1_DIGEST_LENGTH SHA_DIGEST_LENGTH
#define SHA1_STR_LENGTH (SHA1_DIGEST_LENGTH * 2 + 1)
#define MD5_STR_LENGTH (MD5_DIGEST_LENGTH * 2 + 1)
#define APPEND_STR_META(doc, keyname, value) \
{meta_line_t *meta_str = malloc(sizeof(meta_line_t) + strlen(value)); \

View File

@ -4,7 +4,12 @@
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define IS_VIDEO(fmt) ( \
(fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0 \
&& strcmp((fmt)->iformat->name, "jpeg_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "webp_pipe") != 0 \
&& strcmp((fmt)->iformat->name, "png_pipe") != 0 \
)
#define STORE_AS_IS ((void*)-1)
@ -279,18 +284,22 @@ static void
append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *frame, document_t *doc, int is_video) {
if (is_video) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
if (pFormatCtx->duration / AV_TIME_BASE != 0) {
meta_line_t *meta_duration = malloc(sizeof(meta_line_t));
meta_duration->key = MetaMediaDuration;
meta_duration->long_val = pFormatCtx->duration / AV_TIME_BASE;
if (meta_duration->long_val > INT32_MAX) {
meta_duration->long_val = 0;
}
APPEND_META(doc, meta_duration)
}
APPEND_META(doc, meta_duration)
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
if (pFormatCtx->bit_rate != 0) {
meta_line_t *meta_bitrate = malloc(sizeof(meta_line_t));
meta_bitrate->key = MetaMediaBitrate;
meta_bitrate->long_val = pFormatCtx->bit_rate;
APPEND_META(doc, meta_bitrate)
}
}
AVDictionaryEntry *tag = NULL;
@ -459,7 +468,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
if (scaled_frame == STORE_AS_IS) {
return_value = SAVE_THUMBNAIL_OK;
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@ -473,7 +482,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// Save thumbnail
if (thumbnail_index == 0) {
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
return_value = SAVE_THUMBNAIL_OK;
} else if (thumbnail_index > 1) {
@ -482,9 +491,8 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
// I figure out a better fix.
thumbnail_index -= 1;
char tn_key[sizeof(doc->path_md5) + sizeof(int)];
memcpy(tn_key, doc->path_md5, sizeof(doc->path_md5));
memcpy(tn_key + sizeof(doc->path_md5), &thumbnail_index, sizeof(thumbnail_index));
char tn_key[sizeof(doc->doc_id) + sizeof(char) * 4];
snprintf(tn_key, sizeof(tn_key), "%s%04d", doc->doc_id, thumbnail_index);
ctx->store((char *) tn_key, sizeof(tn_key), (char *) jpeg_packet.data, jpeg_packet.size);
} else {
@ -578,9 +586,10 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
int video_duration_in_seconds = (int) (pFormatCtx->duration / AV_TIME_BASE);
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF && video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 5s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 5 + 1), 1) + 1
int thumbnails_to_generate = (IS_VIDEO(pFormatCtx) && stream->codecpar->codec_id != AV_CODEC_ID_GIF &&
video_duration_in_seconds >= 15)
// Limit to ~1 thumbnail every 7s
? MAX(MIN(ctx->tn_count, video_duration_in_seconds / 7 + 1), 1) + 1
: 1;
const double seek_increment = thumbnails_to_generate == 1
@ -845,7 +854,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
if (scaled_frame == STORE_AS_IS) {
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) frame_and_packet->packet->data,
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) frame_and_packet->packet->data,
frame_and_packet->packet->size);
} else {
// Encode frame to jpeg
@ -859,7 +868,7 @@ int store_image_thumbnail(scan_media_ctx_t *ctx, void *buf, size_t buf_len, docu
// Save thumbnail
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
avcodec_free_context(&jpeg_encoder);

View File

@ -1,6 +1,6 @@
#include "scan_mobi.h"
#include <mobi.h>
#include "../../third-party/libmobi/src/mobi.h"
#include <errno.h>
#include "stdlib.h"

View File

@ -191,7 +191,7 @@ void read_thumbnail(scan_ooxml_ctx_t *ctx, document_t *doc, struct archive *a, s
archive_read_data(a, buf, entry_size);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), buf, entry_size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), buf, entry_size);
free(buf);
}

View File

@ -7,8 +7,22 @@
#define MIN_SIZE 32
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
return store_image_thumbnail((scan_media_ctx_t *) ctx, img->data, img->data_size, doc, "x.jpeg");
int store_thumbnail_jpeg(scan_raw_ctx_t *ctx, libraw_thumbnail_t img, document_t *doc) {
scan_media_ctx_t media_ctx = {
.read_subtitles = FALSE,
.tn_count = 1,
.max_media_buffer = 0,
.store = ctx->store,
.log = ctx->log,
.logf = ctx->logf,
.tn_size = ctx->tn_size,
.tn_qscale = ctx->tn_qscale,
.tesseract_lang = NULL,
.tesseract_path = NULL
};
return store_image_thumbnail(&media_ctx, img.thumb, img.tlength, doc, "x.jpeg");
}
int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, document_t *doc) {
@ -70,7 +84,7 @@ int store_thumbnail_rgb24(scan_raw_ctx_t *ctx, libraw_processed_image_t *img, do
avcodec_receive_packet(jpeg_encoder, &jpeg_packet);
APPEND_LONG_META(doc, MetaThumbnail, 1)
ctx->store((char *) doc->path_md5, sizeof(doc->path_md5), (char *) jpeg_packet.data, jpeg_packet.size);
ctx->store((char *) doc->doc_id, sizeof(doc->doc_id), (char *) jpeg_packet.data, jpeg_packet.size);
av_packet_unref(&jpeg_packet);
av_free(*scaled_frame->data);
@ -171,25 +185,25 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
int errc = 0;
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
if (errc != 0) {
free(buf);
libraw_dcraw_clear_mem(thumb);
libraw_close(libraw_lib);
return;
}
int tn_ok = 0;
if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_JPEG) {
tn_ok = store_thumbnail_jpeg(ctx, thumb, doc);
tn_ok = store_thumbnail_jpeg(ctx, libraw_lib->thumbnail, doc);
} else if (libraw_lib->thumbnail.tformat == LIBRAW_THUMBNAIL_BITMAP) {
// TODO: technically this should work but is currently untested
int errc = 0;
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);
if (errc != 0) {
free(buf);
libraw_dcraw_clear_mem(thumb);
libraw_close(libraw_lib);
return;
}
tn_ok = store_thumbnail_rgb24(ctx, thumb, doc);
}
libraw_dcraw_clear_mem(thumb);
if (tn_ok == TRUE) {
free(buf);
libraw_close(libraw_lib);
@ -206,7 +220,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
libraw_dcraw_process(libraw_lib);
errc = 0;
int errc = 0;
libraw_processed_image_t *img = libraw_dcraw_make_mem_image(libraw_lib, &errc);
if (errc != 0) {
free(buf);

View File

@ -48,6 +48,9 @@ typedef int scan_code_t;
#define CTX_LOG_FATALF(filepath, fmt, ...) ctx->logf(filepath, LEVEL_FATAL, fmt, __VA_ARGS__); exit(-1);
#define CTX_LOG_FATAL(filepath, str) ctx->log(filepath, LEVEL_FATAL, str); exit(-1);
#define SIST_DOC_ID_LEN MD5_STR_LENGTH
#define SIST_INDEX_ID_LEN MD5_STR_LENGTH
enum metakey {
// String
MetaContent = 1,
@ -103,7 +106,7 @@ typedef struct meta_line {
typedef struct document {
unsigned char path_md5[MD5_DIGEST_LENGTH];
char doc_id[SIST_DOC_ID_LEN];
unsigned long size;
unsigned int mime;
int mtime;
@ -159,7 +162,7 @@ typedef struct parse_job_t {
int base;
int ext;
struct vfile vfile;
unsigned char parent[MD5_DIGEST_LENGTH];
char parent[SIST_DOC_ID_LEN];
char filepath[1];
} parse_job_t;

View File

@ -923,7 +923,6 @@ TEST(Msdoc, Test1Pdf) {
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "October 2000") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaTitle)->str_val, "INTERNATIONAL ORGANIZATION FOR STANDARDIZATION");
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "Oliver Morgan");
ASSERT_EQ(get_meta(&doc, MetaPages)->long_val, 57);
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
@ -1030,6 +1029,23 @@ TEST(Msdoc, TestUtf8Text) {
cleanup(&doc, &f);
}
TEST(Msdoc, Test5Pdf) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/msdoc/test5.doc", &f, &doc);
size_t size_before = store_size;
parse_msdoc(&msdoc_ctx, &f, &doc);
ASSERT_TRUE(strstr(get_meta(&doc, MetaContent)->str_val, "орган Федеральной") != nullptr);
ASSERT_STREQ(get_meta(&doc, MetaAuthor)->str_val, "uswo");
ASSERT_NEAR(strlen(get_meta(&doc, MetaContent)->str_val), msdoc_ctx.content_size, 4);
ASSERT_NE(size_before, store_size);
cleanup(&doc, &f);
}
TEST(Msdoc, TestFuzz1) {
vfile_t f;
document_t doc;
@ -1189,4 +1205,7 @@ int main(int argc, char **argv) {
av_log_set_level(AV_LOG_QUIET);
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
}
// 0x6130000d2580
// "/mnt/Hatchery/m ain/downloads/qbittorrent/downloads/Roskomnadzor/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ.zip#/УПРАВЛЕНИЕ РОСКОМНАДЗОРА по РБ/Лопатин Ю.М/Секнин/2015 год/Обучение по ", <incomplete sequence \320>...

@ -1 +1 @@
Subproject commit 62ae66db99e9dd88dfa31999f516f71bb8bdc8b2
Subproject commit ddb042143e72a8b789e06f09dbc897dfa9f15b82

@ -0,0 +1 @@
Subproject commit 395dbde361a80353a9ed8b65d01d6066554142b3