Merge pull request #231 from simon987/dev

v2.11.6
This commit is contained in:
simon987 2022-01-09 09:28:24 -05:00 committed by GitHub
commit 65c499e477
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
53 changed files with 890 additions and 216 deletions

View File

@ -11,21 +11,6 @@ steps:
image: simon987/sist2-build
commands:
- ./scripts/build.sh
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: scp files
image: appleboy/drone-scp
settings:
@ -42,6 +27,21 @@ steps:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
---
kind: pipeline

2
.gitignore vendored
View File

@ -24,3 +24,5 @@ Testing/
test_i
test_i_inc
node_modules/
.cmake/
i_inc/

3
.gitmodules vendored
View File

@ -1,6 +1,3 @@
[submodule "third-party/libscan"]
path = third-party/libscan
url = https://github.com/simon987/libscan
[submodule "third-party/argparse"]
path = third-party/argparse
url = https://github.com/simon987/argparse

View File

@ -22,9 +22,6 @@ add_subdirectory(third-party/argparse)
add_executable(sist2
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
@ -41,7 +38,11 @@ add_executable(sist2
src/log.c src/log.h
src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h)
src/parsing/sidecar.c src/parsing/sidecar.h
# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c
)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
@ -86,6 +87,7 @@ if (SIST_DEBUG)
sist2
PRIVATE
-fsanitize=address
-static-libasan
)
set_target_properties(
sist2

View File

@ -51,7 +51,7 @@ sist2 (Simple incremental search tool)
1. Download the [latest sist2 release](https://github.com/simon987/sist2/releases) *
1. *(or)* Download a [development snapshot](https://files.simon987.net/.gate/sist2/simon987_sist2/) *(Not
recommended!)*
1. *(or)* `docker pull simon987/sist2:2.11.4-x64-linux`
1. *(or)* `docker pull simon987/sist2:2.11.6-x64-linux`
1. See [Usage guide](docs/USAGE.md)
@ -67,23 +67,23 @@ See [Usage guide](docs/USAGE.md) for more details
## Format support
File type | Library | Content | Thumbnail | Metadata
:---|:---|:---|:---|:---
pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
cbz,cbr | [libscan](https://github.com/simon987/libscan) | - | yes | - |
`audio/*` | ffmpeg | - | yes | ID3 tags |
`video/*` | ffmpeg | - | yes | title, comment, artist |
`image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
`text/plain` | [libscan](https://github.com/simon987/libscan) | yes | no | - |
html, xml | [libscan](https://github.com/simon987/libscan) | yes | no | - |
tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
docx, xlsx, pptx | [libscan](https://github.com/simon987/libscan) | yes | if embedded | creator, modified_by, title |
doc (MS Word 97-2003) | antiword | yes | yes | author, title |
mobi, azw, azw3 | libmobi | yes | no | author, title |
wpd (WordPerfect) | libwpd | yes | no | *planned* |
json, jsonl, ndjson | [libscan](https://github.com/simon987/libscan) | yes | - | - |
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | - | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | - | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | yes | author, title |
| mobi, azw, azw3 | libmobi | yes | no | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@ -102,18 +102,24 @@ scan is also supported.
### OCR
You can enable OCR support for pdf,xps,fb2,epub file types with the
`--ocr <lang>` option. Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with the
`--ocr-lang <lang>` option in combination with `--ocr-images` and/or `--ocr-ebooks`.
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa) pre-installed.
Examples
You can use the `+` separator to specify multiple languages. The language
name must be identical to the `*.traineddata` file installed on your system
(use `chi_sim` rather than `chi-sim`).
Examples:
```bash
sist2 scan --ocr jpn ~/Books/Manga/
sist2 scan --ocr eng ~/Books/Textbooks/
sist2 scan --ocr-ebooks --ocr-lang jpn ~/Books/Manga/
sist2 scan --ocr-images --ocr-lang eng ~/Images/Screenshots/
sist2 scan --ocr-ebooks --ocr-images --ocr-lang eng+chi_sim ~/Chinese-Bilingual/
```
## Build from source
@ -126,7 +132,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -f ./Dockerfile -t my-sist2-image
docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
```
### On a linux computer
@ -143,7 +149,7 @@ docker run --rm my-sist2-image cat /root/sist2 > sist2-x64-linux
```bash
vcpkg install curl[core,openssl]
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libuuid libmagic libraw jasper lcms gumbo
vcpkg install lmdb cjson glib brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf gtest mongoose libmagic libraw jasper lcms gumbo
```
1. Build

View File

@ -43,7 +43,7 @@ Scan options
--depth=<int> Scan up to DEPTH subdirectories deep. Use 0 to only scan files in PATH. DEFAULT: -1
--archive=<str> Archive file mode (skip|list|shallow|recurse). skip: Don't parse, list: only get file names as text, shallow: Don't parse archives inside archives. DEFAULT: recurse
--archive-passphrase=<str> Passphrase for encrypted archive files
--ocr=<str> Tesseract language (use tesseract --list-langs to see which are installed on your machine)
# TODO: add new --ocr-* options here
-e, --exclude=<str> Files that match this regex will not be scanned
--fast Only index file names & mime type
--treemap-threshold=<str> Relative size threshold for treemap (see USAGE.md). DEFAULT: 0.0005

View File

@ -7,4 +7,3 @@ python3 scripts/serve_static.py > src/web/static_generated.c
python3 scripts/index_static.py > src/index/static_generated.c
printf "static const char *const Sist2CommitHash = \"%s\";\n" $(git rev-parse HEAD) > src/git_hash.h
printf "static const char *const LibScanCommitHash = \"%s\";\n" $(cd third-party/libscan/ && git rev-parse HEAD) >> src/git_hash.h

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -256,20 +256,31 @@ class Sist2Api {
});
}
getMimeTypes() {
return this.esQuery({
aggs: {
getMimeTypes(query = undefined) {
const AGGS = {
mimeTypes: {
terms: {
field: "mime",
size: 10000
}
}
},
};
if (!query) {
query = {
aggs: AGGS,
size: 0,
}).then(resp => {
};
} else {
query.size = 0;
query.aggs = AGGS;
}
return this.esQuery(query).then(resp => {
const mimeMap: any[] = [];
resp["aggregations"]["mimeTypes"]["buckets"].sort((a: any, b: any) => a.key > b.key).forEach((bucket: any) => {
const buckets = resp["aggregations"]["mimeTypes"]["buckets"];
buckets.sort((a: any, b: any) => a.key > b.key).forEach((bucket: any) => {
const tmp = bucket["key"].split("/");
const category = tmp[0];
const mime = tmp[1];
@ -289,11 +300,18 @@ class Sist2Api {
});
if (!category_exists) {
mimeMap.push({"text": category, children: [child]});
mimeMap.push({text: category, children: [child], id: category});
}
})
return mimeMap;
mimeMap.forEach(node => {
if (node.children) {
node.children.sort((a, b) => a.id.localeCompare(b.id));
}
})
mimeMap.sort((a, b) => a.id.localeCompare(b.id))
return {buckets, mimeMap};
});
}

View File

@ -15,7 +15,7 @@
<script>
import IndexDebugInfo from "@/components/IndexDebugInfo";
import DebugIcon from "@/components/DebugIcon";
import DebugIcon from "@/components/icons/DebugIcon";
export default {
name: "DebugInfo.vue",
@ -27,7 +27,6 @@ export default {
{key: "platform", value: this.$store.state.sist2Info.platform},
{key: "debugBinary", value: this.$store.state.sist2Info.debug},
{key: "sist2CommitHash", value: this.$store.state.sist2Info.sist2Hash},
{key: "libscanCommitHash", value: this.$store.state.sist2Info.libscanHash},
{key: "esIndex", value: this.$store.state.sist2Info.esIndex},
{key: "tagline", value: this.$store.state.sist2Info.tagline},
{key: "dev", value: this.$store.state.sist2Info.dev},

View File

@ -34,9 +34,11 @@
</svg>
</div>
<img v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
<img ref="tn"
v-if="doc._props.isPlayableImage || doc._props.isPlayableVideo"
:src="(doc._props.isGif && hover) ? `f/${doc._id}` : `t/${doc._source.index}/${doc._id}`"
alt=""
:style="{height: (doc._props.isGif && hover) ? `${tnHeight()}px` : undefined}"
class="pointer fit card-img-top" @click="onThumbnailClick()">
<img v-else :src="`t/${doc._source.index}/${doc._id}`" alt=""
class="fit card-img-top">
@ -122,6 +124,9 @@ export default {
},
onTnLeave() {
this.hover = false;
},
tnHeight() {
return this.$refs.tn.height;
}
},
}

View File

@ -1,5 +1,6 @@
<template>
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}">
<b-list-group-item class="flex-column align-items-start mb-2" :class="{'sub-document': doc._props.isSubDocument}"
@mouseenter="onTnEnter()" @mouseleave="onTnLeave()" >
<!-- Info modal-->
<DocInfoModal :show="showInfo" :doc="doc" @close="showInfo = false"></DocInfoModal>
@ -56,7 +57,7 @@ import TagContainer from "@/components/TagContainer";
import DocFileTitle from "@/components/DocFileTitle";
import DocInfoModal from "@/components/DocInfoModal";
import ContentDiv from "@/components/ContentDiv";
import FileIcon from "@/components/FileIcon";
import FileIcon from "@/components/icons/FileIcon";
export default {
name: "DocListItem",
@ -85,7 +86,13 @@ export default {
return this.doc.highlight["path.nGram"] + "/"
}
return this.doc._source.path + "/"
}
},
onTnEnter() {
this.hover = true;
},
onTnLeave() {
this.hover = false;
},
}
}
</script>

View File

@ -133,6 +133,11 @@ export default Vue.extend({
font-size: 80%;
}
.theme-black .version-badge {
color: #eee !important;
background: none;
}
.version-badge {
color: #222 !important;
background: none;

View File

@ -1,6 +1,5 @@
<template>
<div>
<!-- TODO: Set slideshowTime as a configurable option-->
<FsLightbox
:key="lightboxKey"
:toggler="showLightbox"
@ -10,7 +9,7 @@
:types="lightboxTypes"
:source-index="lightboxSlide"
:custom-toolbar-buttons="customButtons"
:slideshow-time="1000 * 10"
:slideshow-time="$store.getters.optLightboxSlideDuration * 1000"
:zoom-increment="0.5"
:load-only-current-source="$store.getters.optLightboxLoadOnlyCurrent"
:on-close="onClose"

View File

@ -7,40 +7,24 @@ import InspireTree from "inspire-tree";
import InspireTreeDOM from "inspire-tree-dom";
import "inspire-tree-dom/dist/inspire-tree-light.min.css";
import {getSelectedTreeNodes} from "@/util";
import {getSelectedTreeNodes, getTreeNodeAttributes} from "@/util";
import Sist2Api from "@/Sist2Api";
import Sist2Query from "@/Sist2Query";
export default {
name: "MimePicker",
data() {
return {
mimeTree: null,
stashedMimeTreeAttributes: null
}
},
mounted() {
this.$store.subscribe((mutation) => {
if (mutation.type === "setUiMimeMap") {
const mimeMap = mutation.payload.slice();
const elem = document.getElementById("mimeTree");
console.log(elem);
this.mimeTree = new InspireTree({
selection: {
mode: 'checkbox'
},
data: mimeMap
});
new InspireTreeDOM(this.mimeTree, {
target: '#mimeTree'
});
this.mimeTree.on("node.state.changed", this.handleTreeClick);
this.mimeTree.deselect();
if (this.$store.state._onLoadSelectedMimeTypes.length > 0) {
this.$store.state._onLoadSelectedMimeTypes.forEach(mime => {
this.mimeTree.node(mime).select();
});
}
if (mutation.type === "setUiMimeMap" && this.mimeTree === null) {
this.initializeTree();
} else if (mutation.type === "busSearch") {
this.updateTree();
}
});
},
@ -52,6 +36,73 @@ export default {
this.$store.commit("setSelectedMimeTypes", getSelectedTreeNodes(this.mimeTree));
},
updateTree() {
if (this.$store.getters.optUpdateMimeMap === false) {
return;
}
if (this.stashedMimeTreeAttributes === null) {
this.stashedMimeTreeAttributes = getTreeNodeAttributes(this.mimeTree);
}
const query = Sist2Query.searchQuery();
Sist2Api.getMimeTypes(query).then(({buckets, mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.$store.commit("setUiDetailsMimeAgg", buckets);
this.mimeTree.removeAll();
this.mimeTree.addNodes(mimeMap);
// Restore selected mimes
if (this.stashedMimeTreeAttributes === null) {
// NOTE: This happens when successive fast searches are triggered
this.stashedMimeTreeAttributes = {};
// Always add the selected mime types
this.$store.state.selectedMimeTypes.forEach(mime => {
this.stashedMimeTreeAttributes[mime] = {
checked: true
}
});
}
Object.entries(this.stashedMimeTreeAttributes).forEach(([mime, attributes]) => {
if (this.mimeTree.node(mime)) {
if (attributes.checked) {
this.mimeTree.node(mime).select();
}
if (attributes.collapsed === false) {
this.mimeTree.node(mime).expand();
}
}
});
this.stashedMimeTreeAttributes = null;
});
},
initializeTree() {
const mimeMap = this.$store.state.uiMimeMap;
this.mimeTree = new InspireTree({
selection: {
mode: "checkbox"
},
data: mimeMap
});
new InspireTreeDOM(this.mimeTree, {
target: "#mimeTree"
});
this.mimeTree.on("node.state.changed", this.handleTreeClick);
this.mimeTree.deselect();
if (this.$store.state._onLoadSelectedMimeTypes.length > 0) {
this.$store.state._onLoadSelectedMimeTypes.forEach(mime => {
this.mimeTree.node(mime).select();
});
}
}
}
}
</script>

View File

@ -20,7 +20,7 @@
</template>
<script>
import Sist2Icon from "@/components/Sist2Icon";
import Sist2Icon from "@/components/icons/Sist2Icon";
export default {
name: "NavBar",

View File

@ -3,7 +3,10 @@
<span>{{ hitCount }} {{ hitCount === 1 ? $t("hit") : $t("hits") }}</span>
<div style="float: right">
<b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile">{{ $t("details") }}</b-button>
<b-button v-b-toggle.collapse-1 variant="primary" class="not-mobile" @click="onToggle()">{{
$t("details")
}}
</b-button>
<template v-if="hitCount !== 0">
<SortSelect class="ml-2"></SortSelect>
@ -14,22 +17,42 @@
<b-collapse id="collapse-1" class="pt-2" style="clear:both;">
<b-card>
<b-table :items="tableItems" small borderless thead-class="hidden" class="mb-0"></b-table>
<b-table :items="tableItems" small borderless bordered thead-class="hidden" class="mb-0"></b-table>
<br/>
<h4>
{{$t("mimeTypes")}}
<b-button size="sm" variant="primary" class="float-right" @click="onCopyClick"><ClipboardIcon/></b-button>
</h4>
<Preloader v-if="$store.state.uiDetailsMimeAgg == null"></Preloader>
<b-table
v-else
sort-by="doc_count"
:sort-desc="true"
thead-class="hidden"
:items="$store.state.uiDetailsMimeAgg" small bordered class="mb-0"
></b-table>
</b-card>
</b-collapse>
</b-card>
</template>
<script lang="ts">
import {EsResult} from "@/Sist2Api";
import Sist2Api, {EsResult} from "@/Sist2Api";
import Vue from "vue";
import {humanFileSize} from "@/util";
import DisplayModeToggle from "@/components/DisplayModeToggle.vue";
import SortSelect from "@/components/SortSelect.vue";
import Preloader from "@/components/Preloader.vue";
import Sist2Query from "@/Sist2Query";
import ClipboardIcon from "@/components/icons/ClipboardIcon.vue";
export default Vue.extend({
name: "ResultsCard",
components: {SortSelect, DisplayModeToggle},
components: {ClipboardIcon, Preloader, SortSelect, DisplayModeToggle},
created() {
},
computed: {
lastResultsLoaded() {
return this.$store.state.lastQueryResults != null;
@ -54,6 +77,39 @@ export default Vue.extend({
totalSize() {
return humanFileSize((this.$store.state.lastQueryResults as EsResult).aggregations.total_size.value);
},
onToggle() {
const show = !document.getElementById("collapse-1").classList.contains("show");
this.$store.commit("setUiShowDetails", show);
if (show && this.$store.state.uiDetailsMimeAgg == null && !this.$store.state.optUpdateMimeMap) {
// Mime aggs are not updated automatically, update now
this.forceUpdateMimeAgg();
}
},
onCopyClick() {
let tsvString = "";
this.$store.state.uiDetailsMimeAgg.slice().sort((a,b) => b["doc_count"] - a["doc_count"]).forEach(row => {
tsvString += `${row["key"]}\t${row["doc_count"]}\n`;
});
navigator.clipboard.writeText(tsvString);
this.$bvToast.toast(
this.$t("toast.copiedToClipboard"),
{
title: null,
noAutoHide: false,
toaster: "b-toaster-bottom-right",
headerClass: "hidden",
bodyClass: "toast-body-info",
});
},
forceUpdateMimeAgg() {
const query = Sist2Query.searchQuery();
Sist2Api.getMimeTypes(query).then(({buckets}) => {
this.$store.commit("setUiDetailsMimeAgg", buckets);
});
}
},
});

View File

@ -51,7 +51,7 @@
>{{ tag.text.split(".").pop() }}</span>
<b-popover :target="hit._id+tag.rawText" triggers="focus blur" placement="top">
<b-button variant="danger" @click="onTagDeleteClick(tag, $event)">Delete</b-button>
<b-button variant="danger" @click="onTagDeleteClick(tag, $event)">{{$t("deleteTag")}}</b-button>
</b-popover>
</div>
@ -63,7 +63,7 @@
</template>
<!-- Add button -->
<small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">Add</small>
<small v-if="showAddButton" class="badge add-tag-button" @click="tagAdd()">{{$t("addTag")}}</small>
<!-- Size tag-->
<small v-else class="text-muted badge-size">{{

View File

@ -120,7 +120,7 @@ export default {
},
mounted() {
this.$store.subscribe((mutation) => {
if (mutation.type === "setUiMimeMap") {
if (mutation.type === "setUiMimeMap" && this.tagTree === null) {
this.initializeTree();
this.updateTree();
} else if (mutation.type === "busUpdateTags") {
@ -147,6 +147,7 @@ export default {
this.tagTree.on("node.state.changed", this.handleTreeClick);
},
updateTree() {
// TODO: remember which tags are selected and restore?
const tagMap = [];
Sist2Api.getTags().then(tags => {
tags.forEach(tag => addTag(tagMap, tag.id, tag.id, tag.count));

View File

@ -0,0 +1,21 @@
<template>
<svg style="width:24px;height:24px" viewBox="0 0 24 24">
<path
fill="currentColor"
d="M17,9H7V7H17M17,13H7V11H17M14,17H7V15H14M12,3A1,1 0 0,1 13,4A1,1 0 0,1 12,5A1,1 0 0,1 11,4A1,1 0 0,1 12,3M19,3H14.82C14.4,1.84 13.3,1 12,1C10.7,1 9.6,1.84 9.18,3H5A2,2 0 0,0 3,5V19A2,2 0 0,0 5,21H19A2,2 0 0,0 21,19V5A2,2 0 0,0 19,3Z"/>
</svg>
</template>
<script>
export default {
name: "ClipboardIcon"
}
</script>
<style scoped>
svg {
display: inline-block;
width: 20px;
height: 20px;
}
</style>

View File

@ -0,0 +1,21 @@
<template>
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24">
<path
fill="currentColor"
d="M12 0c-6.627 0-12 5.373-12 12s5.373 12 12 12 12-5.373 12-12-5.373-12-12-12zm1 16.057v-3.057h2.994c-.059 1.143-.212 2.24-.456 3.279-.823-.12-1.674-.188-2.538-.222zm1.957 2.162c-.499 1.33-1.159 2.497-1.957 3.456v-3.62c.666.028 1.319.081 1.957.164zm-1.957-7.219v-3.015c.868-.034 1.721-.103 2.548-.224.238 1.027.389 2.111.446 3.239h-2.994zm0-5.014v-3.661c.806.969 1.471 2.15 1.971 3.496-.642.084-1.3.137-1.971.165zm2.703-3.267c1.237.496 2.354 1.228 3.29 2.146-.642.234-1.311.442-2.019.607-.344-.992-.775-1.91-1.271-2.753zm-7.241 13.56c-.244-1.039-.398-2.136-.456-3.279h2.994v3.057c-.865.034-1.714.102-2.538.222zm2.538 1.776v3.62c-.798-.959-1.458-2.126-1.957-3.456.638-.083 1.291-.136 1.957-.164zm-2.994-7.055c.057-1.128.207-2.212.446-3.239.827.121 1.68.19 2.548.224v3.015h-2.994zm1.024-5.179c.5-1.346 1.165-2.527 1.97-3.496v3.661c-.671-.028-1.329-.081-1.97-.165zm-2.005-.35c-.708-.165-1.377-.373-2.018-.607.937-.918 2.053-1.65 3.29-2.146-.496.844-.927 1.762-1.272 2.753zm-.549 1.918c-.264 1.151-.434 2.36-.492 3.611h-3.933c.165-1.658.739-3.197 1.617-4.518.88.361 1.816.67 2.808.907zm.009 9.262c-.988.236-1.92.542-2.797.9-.89-1.328-1.471-2.879-1.637-4.551h3.934c.058 1.265.231 2.488.5 3.651zm.553 1.917c.342.976.768 1.881 1.257 2.712-1.223-.49-2.326-1.211-3.256-2.115.636-.229 1.299-.435 1.999-.597zm9.924 0c.7.163 1.362.367 1.999.597-.931.903-2.034 1.625-3.257 2.116.489-.832.915-1.737 1.258-2.713zm.553-1.917c.27-1.163.442-2.386.501-3.651h3.934c-.167 1.672-.748 3.223-1.638 4.551-.877-.358-1.81-.664-2.797-.9zm.501-5.651c-.058-1.251-.229-2.46-.492-3.611.992-.237 1.929-.546 2.809-.907.877 1.321 1.451 2.86 1.616 4.518h-3.933z"/>
</svg>
</template>
<script>
export default {
name: "LanguageIcon"
}
</script>
<style scoped>
svg {
display: inline-block;
width: 20px;
height: 20px;
}
</style>

View File

@ -5,6 +5,8 @@ export default {
advanced: "Advanced search",
fuzzy: "Fuzzy"
},
addTag: "Add",
deleteTag: "Delete",
download: "Download",
and: "and",
page: "page",
@ -64,7 +66,8 @@ export default {
resultSize: "Number of results per page",
tagOrOperator: "Use OR operator when specifying multiple tags.",
hideDuplicates: "Hide duplicate results based on checksum",
hideLegacy: "Hide the 'legacyES' Elasticsearch notice"
hideLegacy: "Hide the 'legacyES' Elasticsearch notice",
updateMimeMap: "Update the Media Types tree in real time"
},
queryMode: {
simple: "Simple",
@ -72,7 +75,8 @@ export default {
},
lang: {
en: "English",
fr: "Français"
fr: "Français",
"zh-CN": "简体中文",
},
displayMode: {
grid: "Grid",
@ -126,11 +130,13 @@ export default {
esQueryErr: "Could not parse or execute query, please check the Advanced search documentation. " +
"See server logs for more information.",
dupeTagTitle: "Duplicate tag",
dupeTag: "This tag already exists for this document."
dupeTag: "This tag already exists for this document.",
copiedToClipboard: "Copied to clipboard"
},
saveTagModalTitle: "Add tag",
saveTagPlaceholder: "Tag name",
confirm: "Confirm",
indexPickerPlaceholder: "Select an index",
sort: {
relevance: "Relevance",
dateAsc: "Date (Older first)",
@ -160,6 +166,8 @@ export default {
advanced: "Recherche avancée",
fuzzy: "Approximatif"
},
addTag: "Ajouter",
deleteTag: "Supprimer",
download: "Télécharger",
and: "et",
page: "page",
@ -220,7 +228,8 @@ export default {
resultSize: "Nombre de résultats par page",
tagOrOperator: "Utiliser l'opérateur OU lors de la spécification de plusieurs tags",
hideDuplicates: "Masquer les résultats en double",
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch"
hideLegacy: "Masquer la notice 'legacyES' Elasticsearch",
updateMimeMap: "Mettre à jour l'arbre de Types de médias en temps réel"
},
queryMode: {
simple: "Simple",
@ -228,7 +237,8 @@ export default {
},
lang: {
en: "English",
fr: "Français"
fr: "Français",
"zh-CN": "简体中文",
},
displayMode: {
grid: "Grille",
@ -283,7 +293,8 @@ export default {
esQueryErr: "Impossible d'analyser ou d'exécuter la requête, veuillez consulter la documentation sur la " +
"recherche avancée. Voir les journaux du serveur pour plus d'informations.",
dupeTagTitle: "Tag en double",
dupeTag: "Ce tag existe déjà pour ce document."
dupeTag: "Ce tag existe déjà pour ce document.",
copiedToClipboard: "Copié dans le presse-papier"
},
saveTagModalTitle: "Ajouter un tag",
saveTagPlaceholder: "Nom du tag",
@ -311,5 +322,166 @@ export default {
selectedIndex: "indice sélectionné",
selectedIndices: "indices sélectionnés",
},
}
},
"zh-CN": {
searchBar: {
simple: "搜索",
advanced: "高级搜索",
fuzzy: "模糊搜索"
},
addTag: "添加",
deleteTag: "删除",
download: "下载",
and: "与",
page: "页",
pages: "页",
mimeTypes: "文件类型",
tags: "标签",
help: {
simpleSearch: "简易搜索",
advancedSearch: "高级搜索",
help: "帮助",
term: "<关键词>",
and: "与操作",
or: "或操作",
not: "反选单个关键词",
quotes: "括起来的部分视为一个关键词,保序",
prefix: "在词尾使用时,匹配前缀",
parens: "表达式编组",
tildeTerm: "匹配编辑距离以内的关键词",
tildePhrase: "匹配短语,容忍一些非匹配词",
example1:
"例如: <code>\"番茄\" +(炒蛋 | 牛腩) -饭</code> 将匹配" +
"短语 <i>番茄炒蛋</i>、<i>炒蛋</i> 或者 <i>牛腩</i>,而忽略任何带有" +
"<i>饭</i>的关键词.",
defaultOperator:
"表达式中无<code>+</code>或者<code>|</code>时,默认使用" +
"<code>+</code>(与操作)。",
fuzzy:
"选中<b>模糊搜索</b>选项时返回部分匹配的结果3-grams)。",
moreInfoSimple: "详细信息:<a target=\"_blank\" " +
"rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html\">Elasticsearch文档</a>",
moreInfoAdvanced: "高级搜索模式文档:<a target=\"_blank\" rel=\"noreferrer\" href=\"//www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax\">Elasticsearch文档</a>"
},
config: "配置",
configDescription: "配置在此浏览器中实时保存。",
configReset: "重置所有设置",
searchOptions: "搜索选项",
treemapOptions: "树状图选项",
displayOptions: "显示选项",
opt: {
lang: "语言",
highlight: "启用高亮",
fuzzy: "默认使用模糊搜索",
searchInPath: "匹配文档路径",
suggestPath: "搜索框启用自动补全",
fragmentSize: "高亮上下文大小",
queryMode: "搜索模式",
displayMode: "显示",
columns: "列数",
treemapType: "树状图类属性",
treemapTiling: "树状图平铺",
treemapColorGroupingDepth: "树状图颜色编组深度(展开)",
treemapColor: "树状图颜色(折叠)",
treemapSize: "树状图大小",
theme: "主题",
lightboxLoadOnlyCurrent: "在图片查看器中,不要预读相邻的全图",
slideDuration: "幻灯片时长",
resultSize: "每页结果数",
tagOrOperator: "使用或操作OR匹配多个标签。",
hideDuplicates: "使用校验码隐藏重复结果",
hideLegacy: "隐藏'legacyES' Elasticsearch 通知",
updateMimeMap: "媒体类型树的实时更新"
},
queryMode: {
simple: "简单",
advanced: "高级",
},
lang: {
en: "English",
fr: "Français",
"zh-CN": "简体中文",
},
displayMode: {
grid: "网格",
list: "列表",
},
columns: {
auto: "自动"
},
treemapType: {
cascaded: "折叠",
flat: "平铺(紧凑)"
},
treemapSize: {
small: "小",
medium: "中",
large: "大",
xLarge: "加大",
xxLarge: "加加大",
custom: "自订",
},
treemapTiling: {
binary: "Binary",
squarify: "Squarify",
slice: "Slice",
dice: "Dice",
sliceDice: "Slice & Dice",
},
theme: {
light: "亮",
black: "暗"
},
hit: "命中",
hits: "命中",
details: "详细信息",
stats: "统计信息",
queryTime: "查询时间",
totalSize: "总大小",
pathBar: {
placeholder: "过滤路径",
modalTitle: "选择路径"
},
debug: "调试信息",
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" +
"<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.",
tagline: "标签栏",
toast: {
esConnErrTitle: "Elasticsearch连接错误",
esConnErr: "sist2 web 模块连接Elasticsearch出错。" +
"查看服务日志以获取更多信息。",
esQueryErrTitle: "查询错误",
esQueryErr: "无法识别或执行查询,请查阅高级搜索文档。" +
"查看服务日志以获取更多信息。",
dupeTagTitle: "重复标签",
dupeTag: "该标签已存在于此文档。",
copiedToClipboard: "复制到剪贴板"
},
saveTagModalTitle: "增加标签",
saveTagPlaceholder: "标签名",
confirm: "确认",
indexPickerPlaceholder: "选择一个索引",
sort: {
relevance: "相关度",
dateAsc: "日期(由旧到新)",
dateDesc: "日期(由新到旧)",
sizeAsc: "大小(从小到大)",
sizeDesc: "大小(从大到小)",
nameAsc: "名字A-z",
nameDesc: "名字 Z-a",
random: "随机",
},
d3: {
mimeCount: "各类文件数量分布",
mimeSize: "各类文件大小分布",
dateHistogram: "文件修改时间分布",
sizeHistogram: "文件大小分布",
},
indexPicker: {
selectNone: "清空",
selectAll: "全选",
selectedIndex: "选中索引",
selectedIndices: "选中索引",
},
},
}

View File

@ -27,6 +27,7 @@ export default new Vuex.Store({
size: 60,
optLang: "en",
optLangIsDefault: true,
optHideDuplicates: true,
optTheme: "light",
optDisplay: "grid",
@ -47,6 +48,7 @@ export default new Vuex.Store({
optLightboxLoadOnlyCurrent: false,
optLightboxSlideDuration: 15,
optHideLegacy: false,
optUpdateMimeMap: true,
_onLoadSelectedIndices: [] as string[],
_onLoadSelectedMimeTypes: [] as string[],
@ -71,9 +73,14 @@ export default new Vuex.Store({
uiLightboxSlide: 0,
uiReachedScrollEnd: false,
uiDetailsMimeAgg: null,
uiShowDetails: false,
uiMimeMap: [] as any[]
},
mutations: {
setUiShowDetails: (state, val) => state.uiShowDetails = val,
setUiDetailsMimeAgg: (state, val) => state.uiDetailsMimeAgg = val,
setUiReachedScrollEnd: (state, val) => state.uiReachedScrollEnd = val,
setTags: (state, val) => state.tags = val,
setPathText: (state, val) => state.pathText = val,
@ -82,7 +89,10 @@ export default new Vuex.Store({
setSist2Info: (state, val) => state.sist2Info = val,
setSeed: (state, val) => state.seed = val,
setOptHideDuplicates: (state, val) => state.optHideDuplicates = val,
setOptLang: (state, val) => state.optLang = val,
setOptLang: (state, val) => {
state.optLang = val;
state.optLangIsDefault = false;
},
setSortMode: (state, val) => state.sortMode = val,
setIndices: (state, val) => {
state.indices = val;
@ -146,8 +156,10 @@ export default new Vuex.Store({
setOptTreemapSize: (state, val) => state.optTreemapSize = val,
setOptTreemapColor: (state, val) => state.optTreemapColor = val,
setOptHideLegacy: (state, val) => state.optHideLegacy = val,
setOptUpdateMimeMap: (state, val) => state.optUpdateMimeMap = val,
setOptLightboxLoadOnlyCurrent: (state, val) => state.optLightboxLoadOnlyCurrent = val,
setOptLightboxSlideDuration: (state, val) => state.optLightboxSlideDuration = val,
setUiMimeMap: (state, val) => state.uiMimeMap = val,
@ -157,8 +169,18 @@ export default new Vuex.Store({
busUpdateTags: () => {
// noop
},
busSearch: () => {
// noop
},
},
actions: {
setSist2Info: (store, val) => {
store.commit("setSist2Info", val);
if (store.state.optLangIsDefault) {
store.commit("setOptLang", val.lang);
}
},
loadFromArgs({commit}, route: Route) {
if (route.query.q) {
@ -278,6 +300,7 @@ export default new Vuex.Store({
commit("setUiLightboxTypes", []);
commit("setUiLightboxCaptions", []);
commit("setUiLightboxKey", 0);
commit("setUiDetailsMimeAgg", null);
}
},
modules: {},
@ -342,5 +365,6 @@ export default new Vuex.Store({
optLightboxSlideDuration: state => state.optLightboxSlideDuration,
optResultSize: state => state.size,
optHideLegacy: state => state.optHideLegacy,
optUpdateMimeMap: state => state.optUpdateMimeMap,
}
})

View File

@ -97,6 +97,30 @@ export function getSelectedTreeNodes(tree: any) {
return Array.from(selectedNodes);
}
export function getTreeNodeAttributes(tree: any) {
const nodes = tree.selectable();
const attributes = {};
for (let i = 0; i < nodes.length; i++) {
let id = null;
if (nodes[i].text.indexOf("(") !== -1 && nodes[i].values) {
id = nodes[i].values.slice(-1)[0];
} else {
id = nodes[i].id
}
attributes[id] = {
checked: nodes[i].itree.state.checked,
collapsed: nodes[i].itree.state.collapsed,
}
}
return attributes;
}
export function serializeMimes(mimes: string[]): string | undefined {
if (mimes.length == 0) {
return undefined;

View File

@ -15,15 +15,8 @@
<h4>{{ $t("displayOptions") }}</h4>
<b-card>
<b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent">
{{ $t("opt.lightboxLoadOnlyCurrent") }}
</b-form-checkbox>
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
{{ $t("opt.hideLegacy") }}
</b-form-checkbox>
<label>{{ $t("opt.lang") }}</label>
<label><LanguageIcon/><span style="vertical-align: middle">&nbsp;{{ $t("opt.lang") }}</span></label>
<b-form-select :options="langOptions" :value="optLang" @input="setOptLang"></b-form-select>
<label>{{ $t("opt.theme") }}</label>
@ -34,6 +27,20 @@
<label>{{ $t("opt.columns") }}</label>
<b-form-select :options="columnsOptions" :value="optColumns" @input="setOptColumns"></b-form-select>
<div style="height: 10px"></div>
<b-form-checkbox :checked="optLightboxLoadOnlyCurrent" @input="setOptLightboxLoadOnlyCurrent">
{{ $t("opt.lightboxLoadOnlyCurrent") }}
</b-form-checkbox>
<b-form-checkbox :checked="optHideLegacy" @input="setOptHideLegacy">
{{ $t("opt.hideLegacy") }}
</b-form-checkbox>
<b-form-checkbox :checked="optUpdateMimeMap" @input="setOptUpdateMimeMap">
{{ $t("opt.updateMimeMap") }}
</b-form-checkbox>
</b-card>
<br/>
@ -117,15 +124,15 @@
</template>
<script>
import Vue from "vue";
import {mapGetters, mapMutations} from "vuex";
import {mapActions, mapGetters, mapMutations} from "vuex";
import DebugInfo from "@/components/DebugInfo.vue";
import Preloader from "@/components/Preloader.vue";
import sist2 from "@/Sist2Api";
import GearIcon from "@/components/GearIcon.vue";
import GearIcon from "@/components/icons/GearIcon.vue";
import LanguageIcon from "@/components/icons/LanguageIcon";
export default {
components: {GearIcon, DebugInfo, Preloader},
components: {LanguageIcon, GearIcon, DebugInfo, Preloader},
data() {
return {
loading: true,
@ -133,6 +140,7 @@ export default {
langOptions: [
{value: "en", text: this.$t("lang.en")},
{value: "fr", text: this.$t("lang.fr")},
{value: "zh-CN", text: this.$t("lang.zh-CN")},
],
queryModeOptions: [
{value: "simple", text: this.$t("queryMode.simple")},
@ -220,6 +228,7 @@ export default {
"optLang",
"optHideDuplicates",
"optHideLegacy",
"optUpdateMimeMap",
]),
clientWidth() {
return window.innerWidth;
@ -227,7 +236,7 @@ export default {
},
mounted() {
sist2.getSist2Info().then(data => {
this.$store.commit("setSist2Info", data)
this.setSist2Info(data);
this.loading = false;
});
@ -238,6 +247,9 @@ export default {
});
},
methods: {
...mapActions({
setSist2Info: "setSist2Info",
}),
...mapMutations([
"setOptTheme",
"setOptDisplay",
@ -255,12 +267,12 @@ export default {
"setOptTreemapSize",
"setOptLightboxLoadOnlyCurrent",
"setOptLightboxSlideDuration",
"setOptContainerWidth",
"setOptResultSize",
"setOptTagOrOperator",
"setOptLang",
"setOptHideDuplicates",
"setOptHideLegacy"
"setOptHideLegacy",
"setOptUpdateMimeMap"
]),
onResetClick() {
localStorage.removeItem("sist2_configuration");

View File

@ -60,7 +60,7 @@
<script lang="ts">
import Preloader from "@/components/Preloader.vue";
import {mapGetters, mapMutations} from "vuex";
import {mapActions, mapGetters, mapMutations} from "vuex";
import sist2 from "../Sist2Api";
import Sist2Api, {EsHit, EsResult} from "../Sist2Api";
import SearchBar from "@/components/SearchBar.vue";
@ -139,7 +139,7 @@ export default Vue.extend({
this.setSist2Info(data);
this.setIndices(data.indices);
Sist2Api.getMimeTypes().then(mimeMap => {
Sist2Api.getMimeTypes(Sist2Query.searchQuery()).then(({mimeMap}) => {
this.$store.commit("setUiMimeMap", mimeMap);
this.uiLoading = false;
this.search(true);
@ -151,8 +151,10 @@ export default Vue.extend({
});
},
methods: {
...mapMutations({
...mapActions({
setSist2Info: "setSist2Info",
}),
...mapMutations({
setIndices: "setIndices",
setDateBoundsMin: "setDateBoundsMin",
setDateBoundsMax: "setDateBoundsMax",
@ -183,6 +185,7 @@ export default Vue.extend({
async searchNow(q: any) {
this.searchBusy = true;
await this.$store.dispatch("incrementQuerySequence");
this.$store.commit("busSearch");
Sist2Api.esQuery(q).then(async (resp: EsResult) => {
await this.handleSearch(resp);
@ -284,6 +287,11 @@ export default Vue.extend({
border: none;
}
.toast-header-info, .toast-body-info {
background: #2196f3;
color: #fff !important;
}
.toast-header-error, .toast-body-error {
background: #a94442;
color: #f2dede !important;

View File

@ -22,6 +22,7 @@
const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"./",
NULL
};
@ -145,7 +146,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
if (args->name == NULL) {
args->name = g_path_get_basename(args->output);
} else {
char* tmp = malloc(strlen(args->name) + 1);
char *tmp = malloc(strlen(args->name) + 1);
strcpy(tmp, args->name);
args->name = tmp;
}
@ -167,17 +168,50 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
return 1;
}
if (args->tesseract_lang != NULL) {
TessBaseAPI *api = TessBaseAPICreate();
char filename[128];
sprintf(filename, "%s.traineddata", args->tesseract_lang);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATAL("cli.c", "Could not find tesseract language file!");
if (args->ocr_images && args->tesseract_lang == NULL) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-images");
return 1;
}
ret = TessBaseAPIInit3(api, path, args->tesseract_lang);
if (args->ocr_ebooks && args->tesseract_lang == NULL) {
fprintf(stderr, "You must specify --ocr-lang <LANG> to use --ocr-ebooks");
return 1;
}
if (args->tesseract_lang != NULL) {
if (!args->ocr_ebooks && !args->ocr_images) {
fprintf(stderr, "You must specify at least one of --ocr-ebooks, --ocr-images");
return 1;
}
TessBaseAPI *api = TessBaseAPICreate();
const char *trained_data_path = NULL;
char *lang = malloc(strlen(args->tesseract_lang) + 1);
strcpy(lang, args->tesseract_lang);
lang = strtok(lang, "+");
while (lang != NULL) {
char filename[128];
sprintf(filename, "%s.traineddata", lang);
const char *path = find_file_in_paths(TESS_DATAPATHS, filename);
if (path == NULL) {
LOG_FATALF("cli.c", "Could not find tesseract language file: %s!", filename);
}
if (trained_data_path != NULL && path != trained_data_path) {
LOG_FATAL("cli.c", "When specifying more than one tesseract language, all the traineddata "
"files must be in the same folder")
}
trained_data_path = path;
lang = strtok(NULL, "+");
}
free(lang);
ret = TessBaseAPIInit3(api, trained_data_path, args->tesseract_lang);
if (ret != 0) {
fprintf(stderr, "Could not initialize tesseract with lang '%s'\n", args->tesseract_lang);
return 1;
@ -185,7 +219,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
args->tesseract_path = path;
args->tesseract_path = trained_data_path;
}
if (args->exclude_regex != NULL) {
@ -218,6 +252,19 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
args->max_memory_buffer = DEFAULT_MAX_MEM_BUFFER;
}
if (args->list_path != NULL) {
if (strcmp(args->list_path, "-") == 0) {
args->list_file = stdin;
LOG_DEBUG("cli.c", "Using stdin as list file")
} else {
args->list_file = fopen(args->list_path, "r");
if (args->list_file == NULL) {
LOG_FATALF("main.c", "List file could not be opened: %s (%s)", args->list_path, errno);
}
}
}
LOG_DEBUGF("cli.c", "arg quality=%f", args->quality)
LOG_DEBUGF("cli.c", "arg size=%d", args->size)
LOG_DEBUGF("cli.c", "arg content_size=%d", args->content_size)
@ -237,6 +284,7 @@ int scan_args_validate(scan_args_t *args, int argc, const char **argv) {
LOG_DEBUGF("cli.c", "arg fast_epub=%d", args->fast_epub)
LOG_DEBUGF("cli.c", "arg treemap_threshold=%f", args->treemap_threshold)
LOG_DEBUGF("cli.c", "arg max_memory_buffer=%d", args->max_memory_buffer)
LOG_DEBUGF("cli.c", "arg list_path=%s", args->list_path)
return 0;
}
@ -362,15 +410,15 @@ int web_args_validate(web_args_t *args, int argc, const char **argv) {
args->es_index = DEFAULT_ES_INDEX;
}
if (args->lang == NULL) {
args->lang = DEFAULT_LANG;
}
if (args->tagline == NULL) {
args->tagline = DEFAULT_TAGLINE;
}
if (strlen(args->lang) != 2) {
if (args->lang == NULL) {
args->lang = DEFAULT_LANG;
}
if (strlen(args->lang) != 2 && strlen(args->lang) != 5) {
fprintf(stderr, "Invalid --lang value, see usage\n");
return 1;
}

View File

@ -21,6 +21,8 @@ typedef struct scan_args {
char *archive_passphrase;
char *tesseract_lang;
const char *tesseract_path;
int ocr_images;
int ocr_ebooks;
char *exclude_regex;
int fast;
const char* treemap_threshold_str;
@ -29,6 +31,8 @@ typedef struct scan_args {
int read_subtitles;
int fast_epub;
int calculate_checksums;
char *list_path;
FILE *list_file;
} scan_args_t;
scan_args_t *scan_args_create();

View File

@ -41,6 +41,7 @@ typedef struct {
GHashTable *original_table;
GHashTable *copy_table;
pthread_mutex_t copy_table_mu;
pcre *exclude;
pcre_extra *exclude_extra;
@ -97,7 +98,7 @@ typedef struct {
int tag_auth_enabled;
char *tagline;
struct index_t indices[256];
char lang[3];
char lang[10];
int dev;
} WebCtx_t;

View File

@ -38,6 +38,8 @@ char *get_meta_key_text(enum metakey meta_key) {
return "parent";
case MetaExifMake:
return "exif_make";
case MetaExifDescription:
return "exif_description";
case MetaExifSoftware:
return "exif_software";
case MetaExifExposureTime:
@ -150,6 +152,7 @@ char *build_json_string(document_t *doc) {
case MetaFontName:
case MetaParent:
case MetaExifMake:
case MetaExifDescription:
case MetaExifSoftware:
case MetaExifExposureTime:
case MetaExifFNumber:

View File

@ -4,6 +4,8 @@
#include <ftw.h>
#define STR_STARTS_WITH(x, y) (strncmp(y, x, strlen(y) - 1) == 0)
__always_inline
parse_job_t *create_fs_parse_job(const char *filepath, const struct stat *info, int base) {
int len = (int) strlen(filepath);
@ -77,3 +79,57 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
int walk_directory_tree(const char *dirpath) {
return nftw(dirpath, handle_entry, MAX_FILE_DESCRIPTORS, FTW_PHYS | FTW_ACTIONRETVAL);
}
int iterate_file_list(void *input_file) {
char buf[PATH_MAX];
struct stat info;
while (fgets(buf, sizeof(buf), input_file) != NULL) {
// Remove trailing newline
*(buf + strlen(buf) - 1) = '\0';
int stat_ret = stat(buf, &info);
if (stat_ret != 0) {
LOG_ERRORF("walk.c", "Could not stat file %s (%s)", buf, strerror(errno));
continue;
}
if (!S_ISREG(info.st_mode)) {
LOG_ERRORF("walk.c", "Is not a regular file: %s", buf);
continue;
}
char *absolute_path = canonicalize_file_name(buf);
if (absolute_path == NULL) {
LOG_FATALF("walk.c", "FIXME: Could not get absolute path of %s", buf);
}
if (ScanCtx.exclude != NULL && EXCLUDED(absolute_path)) {
LOG_DEBUGF("walk.c", "Excluded: %s", absolute_path)
if (S_ISREG(info.st_mode)) {
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_excluded_files_count += 1;
pthread_mutex_unlock(&ScanCtx.dbg_file_counts_mu);
}
continue;
}
if (!STR_STARTS_WITH(absolute_path, ScanCtx.index.desc.root)) {
LOG_FATALF("walk.c", "File is not a children of root folder (%s): %s", ScanCtx.index.desc.root, buf);
}
int base = (int) (strrchr(buf, '/') - buf) + 1;
parse_job_t *job = create_fs_parse_job(absolute_path, &info, base);
free(absolute_path);
tpool_add_work(ScanCtx.pool, parse, job);
}
return 0;
}

View File

@ -5,4 +5,6 @@
int walk_directory_tree(const char *);
int iterate_file_list(void* input_file);
#endif

View File

@ -14,6 +14,9 @@
#include "parsing/mime.h"
#include "parsing/parse.h"
#include <signal.h>
#include <unistd.h>
#include "stats.h"
#define DESCRIPTION "Lightning-fast file system indexer and search tool."
@ -29,8 +32,6 @@ static const char *const usage[] = {
NULL,
};
#include<signal.h>
#include<unistd.h>
static __sighandler_t sigsegv_handler = NULL;
static __sighandler_t sigabrt_handler = NULL;
@ -169,6 +170,7 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.dbg_current_files = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, NULL);
pthread_mutex_init(&ScanCtx.dbg_current_files_mu, NULL);
pthread_mutex_init(&ScanCtx.dbg_file_counts_mu, NULL);
pthread_mutex_init(&ScanCtx.copy_table_mu, NULL);
ScanCtx.calculate_checksums = args->calculate_checksums;
@ -218,6 +220,11 @@ void initialize_scan_context(scan_args_t *args) {
ScanCtx.media_ctx.store = _store;
ScanCtx.media_ctx.max_media_buffer = (long) args->max_memory_buffer * 1024 * 1024;
ScanCtx.media_ctx.read_subtitles = args->read_subtitles;
if (args->ocr_images) {
ScanCtx.media_ctx.tesseract_lang = args->tesseract_lang;
ScanCtx.media_ctx.tesseract_path = args->tesseract_path;
}
init_media();
// OOXML
@ -334,10 +341,20 @@ void sist2_scan(scan_args_t *args) {
ScanCtx.writer_pool = tpool_create(1, writer_cleanup, TRUE, FALSE);
tpool_start(ScanCtx.writer_pool);
if (args->list_path) {
// Scan using file list
int list_ret = iterate_file_list(args->list_file);
if (list_ret != 0) {
LOG_FATALF("main.c", "iterate_file_list() failed! (%d)", list_ret)
}
} else {
// Scan directory recursively
int walk_ret = walk_directory_tree(ScanCtx.index.desc.root);
if (walk_ret == -1) {
LOG_FATALF("main.c", "walk_directory_tree() failed! %s (%d)", strerror(errno), errno)
}
}
tpool_wait(ScanCtx.pool);
tpool_destroy(ScanCtx.pool);
@ -489,7 +506,7 @@ void sist2_web(web_args_t *args) {
WebCtx.tag_auth_enabled = args->tag_auth_enabled;
WebCtx.tagline = args->tagline;
WebCtx.dev = args->dev;
strcpy(WebCtx.lang, "en");
strcpy(WebCtx.lang, args->lang);
for (int i = 0; i < args->index_count; i++) {
char *abs_path = abspath(args->indices[i]);
@ -564,8 +581,11 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "archive-passphrase", &scan_args->archive_passphrase,
"Passphrase for encrypted archive files"),
OPT_STRING(0, "ocr", &scan_args->tesseract_lang, "Tesseract language (use tesseract --list-langs to see "
OPT_STRING(0, "ocr-lang", &scan_args->tesseract_lang,
"Tesseract language (use 'tesseract --list-langs' to see "
"which are installed on your machine)"),
OPT_BOOLEAN(0, "ocr-images", &scan_args->ocr_images, "Enable OCR'ing of image files."),
OPT_BOOLEAN(0, "ocr-ebooks", &scan_args->ocr_ebooks, "Enable OCR'ing of ebook files."),
OPT_STRING('e', "exclude", &scan_args->exclude_regex, "Files that match this regex will not be scanned"),
OPT_BOOLEAN(0, "fast", &scan_args->fast, "Only index file names & mime type"),
OPT_STRING(0, "treemap-threshold", &scan_args->treemap_threshold_str, "Relative size threshold for treemap "
@ -577,6 +597,9 @@ int main(int argc, const char *argv[]) {
OPT_BOOLEAN(0, "fast-epub", &scan_args->fast_epub,
"Faster but less accurate EPUB parsing (no thumbnails, metadata)"),
OPT_BOOLEAN(0, "checksums", &scan_args->calculate_checksums, "Calculate file checksums when scanning."),
OPT_STRING(0, "list-file", &scan_args->list_path, "Specify a list of newline-delimited paths to be scanned"
" instead of normal directory traversal. Use '-' to read"
" from stdin."),
OPT_GROUP("Index options"),
OPT_INTEGER('t', "threads", &common_threads, "Number of threads. DEFAULT=1"),
@ -599,6 +622,7 @@ int main(int argc, const char *argv[]) {
OPT_STRING(0, "tag-auth", &web_args->tag_credentials, "Basic auth in user:password format for tagging"),
OPT_STRING(0, "tagline", &web_args->tagline, "Tagline in navbar"),
OPT_BOOLEAN(0, "dev", &web_args->dev, "Serve html & js files from disk (for development)"),
OPT_STRING(0, "lang", &web_args->lang, "Default UI language. Can be changed by the user"),
OPT_GROUP("Exec-script options"),
OPT_STRING(0, "es-url", &common_es_url, "Elasticsearch url. DEFAULT=http://localhost:9200"),

View File

@ -79,7 +79,9 @@ void parse(void *arg) {
int inc_ts = incremental_get(ScanCtx.original_table, doc->path_md5);
if (inc_ts != 0 && inc_ts == job->vfile.info.st_mtim.tv_sec) {
pthread_mutex_lock(&ScanCtx.copy_table_mu);
incremental_mark_file_for_copy(ScanCtx.copy_table, doc->path_md5);
pthread_mutex_unlock(&ScanCtx.copy_table_mu);
pthread_mutex_lock(&ScanCtx.dbg_file_counts_mu);
ScanCtx.dbg_skipped_files_count += 1;

View File

@ -133,6 +133,9 @@ static int incremental_get_str(GHashTable *table, const char *path_md5) {
}
}
/**
* Not thread safe!
*/
__always_inline
static int incremental_mark_file_for_copy(GHashTable *table, const unsigned char path_md5[MD5_DIGEST_LENGTH]) {
char *ptr = malloc(MD5_STR_LENGTH);

View File

@ -280,7 +280,6 @@ void index_info(struct mg_connection *nc) {
cJSON_AddBoolToObject(json, "esVersionLegacy", USE_LEGACY_ES_SETTINGS(WebCtx.es_version));
cJSON_AddStringToObject(json, "platform", QUOTE(SIST_PLATFORM));
cJSON_AddStringToObject(json, "sist2Hash", Sist2CommitHash);
cJSON_AddStringToObject(json, "libscanHash", LibScanCommitHash);
cJSON_AddStringToObject(json, "lang", WebCtx.lang);
cJSON_AddBoolToObject(json, "dev", WebCtx.dev);
#ifdef SIST_DEBUG

File diff suppressed because one or more lines are too long

View File

@ -5,9 +5,7 @@
#include "../media/media.h"
#include "../arc/arc.h"
#define MIN_OCR_SIZE 350
#define MIN_OCR_LEN 10
#include "../ocr/ocr.h"
/* fill_image callback doesn't let us pass opaque pointers unless I create my own device */
__thread text_buffer_t thread_buffer;
@ -225,7 +223,9 @@ static int read_stext_block(fz_stext_block *block, text_buffer_t *tex) {
return 0;
}
#define IS_VALID_BPP(d) ((d)==1 || (d)==2 || (d)==4 || (d)==8 || (d)==16 || (d)==24 || (d)==32)
static void fill_image_ocr_cb(const char* text, size_t len) {
text_buffer_append_string(&thread_buffer, text, len - 1);
}
void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
fz_image *img, UNUSED(fz_matrix ctm), UNUSED(float alpha),
@ -233,26 +233,9 @@ void fill_image(fz_context *fzctx, UNUSED(fz_device *dev),
int l2factor = 0;
if (img->w > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && IS_VALID_BPP(img->n)) {
if (img->w >= MIN_OCR_WIDTH && img->h >= MIN_OCR_HEIGHT && OCR_IS_VALID_BPP(img->n)) {
fz_pixmap *pix = img->get_pixmap(fzctx, img, NULL, img->w, img->h, &l2factor);
if (pix->h > MIN_OCR_SIZE && img->h > MIN_OCR_SIZE && img->xres != 0) {
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, thread_ctx.tesseract_path, thread_ctx.tesseract_lang);
TessBaseAPISetImage(api, pix->samples, pix->w, pix->h, pix->n, pix->stride);
TessBaseAPISetSourceResolution(api, pix->xres);
char *text = TessBaseAPIGetUTF8Text(api);
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
text_buffer_append_string(&thread_buffer, text, len - 1);
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
ocr_extract_text(thread_ctx.tesseract_path, thread_ctx.tesseract_lang, pix->samples, pix->w, pix->h, pix->n, pix->stride, pix->xres, fill_image_ocr_cb);
fz_drop_pixmap(fzctx, pix);
}
}

View File

@ -1,12 +1,18 @@
#include "media.h"
#include "../ocr/ocr.h"
#include <ctype.h>
#define MIN_SIZE 32
#define AVIO_BUF_SIZE 8192
#define IS_VIDEO(fmt) (fmt->iformat->name && strcmp(fmt->iformat->name, "image2") != 0)
#define IS_VIDEO(fmt) ((fmt)->iformat->name && strcmp((fmt)->iformat->name, "image2") != 0)
#define STREAM_IS_IMAGE (stream->nb_frames <= 1)
#define STORE_AS_IS ((void*)-1)
// Pointer to document being processed
__thread document_t *thread_doc;
const char *get_filepath_with_ext(document_t *doc, const char *filepath, const char *mime_str) {
int has_extension = doc->ext > doc->base;
@ -311,7 +317,7 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
if (strcmp(key, "artist") == 0) {
append_tag_meta_if_not_exists(ctx, doc, tag, MetaArtist);
} else if (strcmp(key, "imagedescription") == 0) {
APPEND_TAG_META(MetaContent)
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
} else if (strcmp(key, "make") == 0) {
APPEND_TAG_META(MetaExifMake)
} else if (strcmp(key, "model") == 0) {
@ -343,6 +349,55 @@ append_video_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVFrame *f
}
}
static void ocr_image_cb(const char *text, size_t len) {
APPEND_STR_META(thread_doc, MetaContent, text);
}
#define OCR_PIXEL_FORMAT AV_PIX_FMT_RGB32
#define OCR_BYTES_PER_PIXEL 4
#define OCR_PIXELS_PER_INCH 70
void ocr_image(scan_media_ctx_t *ctx, document_t *doc, const AVCodecContext *decoder, AVFrame *frame) {
// Convert to RGB32
AVFrame *rgb_frame = av_frame_alloc();
struct SwsContext *sws_ctx = sws_getContext(
frame->width, frame->height, decoder->pix_fmt,
frame->width, frame->height, OCR_PIXEL_FORMAT,
SWS_LANCZOS, 0, 0, 0
);
int dst_buf_len = av_image_get_buffer_size(OCR_PIXEL_FORMAT, frame->width, frame->height, 1);
uint8_t *dst_buf = (uint8_t *) av_malloc(dst_buf_len * 2);
av_image_fill_arrays(rgb_frame->data, rgb_frame->linesize, dst_buf, OCR_PIXEL_FORMAT, frame->width, frame->height,
1);
sws_scale(sws_ctx,
(const uint8_t *const *) frame->data, frame->linesize,
0, frame->height,
rgb_frame->data, rgb_frame->linesize
);
thread_doc = doc;
ocr_extract_text(
ctx->tesseract_path,
ctx->tesseract_lang,
rgb_frame->data[0],
frame->width,
frame->height,
OCR_BYTES_PER_PIXEL,
rgb_frame->linesize[0],
OCR_PIXELS_PER_INCH,
ocr_image_cb
);
sws_freeContext(sws_ctx);
av_free(*rgb_frame->data);
av_frame_free(&rgb_frame);
}
void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, document_t *doc) {
int video_stream = -1;
@ -419,11 +474,11 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
avcodec_open2(decoder, video_codec, NULL);
//Seek
if (stream->nb_frames > 1 && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
if (!STREAM_IS_IMAGE && stream->codecpar->codec_id != AV_CODEC_ID_GIF) {
int seek_ret;
for (int i = 20; i >= 0; i--) {
seek_ret = av_seek_frame(pFormatCtx, video_stream,
stream->duration * 0.10, 0);
(long) ((double) stream->duration * 0.10), 0);
if (seek_ret == 0) {
break;
}
@ -438,6 +493,11 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
return;
}
if (ctx->tesseract_lang != NULL && STREAM_IS_IMAGE) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
// NOTE: OCR'd content takes precedence over exif image description
append_video_meta(ctx, pFormatCtx, frame_and_packet->frame, doc, IS_VIDEO(pFormatCtx));
// Scale frame
@ -534,7 +594,7 @@ long memfile_seek(void *ptr, long offset, int whence) {
memfile_t *mem = ptr;
if (whence == 0x10000) {
return mem->size;
return (long) mem->size;
}
int ret = fseek(mem->file, offset, whence);

View File

@ -19,6 +19,9 @@ typedef struct {
float tn_qscale;
long max_media_buffer;
int read_subtitles;
const char *tesseract_lang;
const char *tesseract_path;
} scan_media_ctx_t;
__always_inline

47
third-party/libscan/libscan/ocr/ocr.h vendored Normal file
View File

@ -0,0 +1,47 @@
#ifndef OCR_H
#define OCR_H
#include "../scan.h"
#include <tesseract/capi.h>
#define MIN_OCR_WIDTH 350
#define MIN_OCR_HEIGHT 100
#define MIN_OCR_LEN 10
#define OCR_IS_VALID_BPP(d) \
((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 || \
(d) == 32)
typedef void (*ocr_extract_callback_t)(const char *, size_t);
__always_inline static void
ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
const unsigned char *img_buf, const int img_w, const int img_h,
const int img_bpp, const int img_stride, const int img_xres,
const ocr_extract_callback_t cb) {
if (img_w < MIN_OCR_WIDTH || img_h < MIN_OCR_HEIGHT || img_xres <= 0 ||
!OCR_IS_VALID_BPP(img_bpp)) {
return;
}
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
TessBaseAPISetSourceResolution(api, img_xres);
char *text = TessBaseAPIGetUTF8Text(api);
if (text != NULL) {
size_t len = strlen(text);
if (len >= MIN_OCR_LEN) {
cb(text, len);
}
TessDeleteText(text);
}
TessBaseAPIEnd(api);
TessBaseAPIDelete(api);
}
#endif

View File

@ -15,18 +15,18 @@ static int should_read_part(const char *part) {
}
if ( // Word
STR_STARTS_WITH(part, "word/document.xml")
|| STR_STARTS_WITH(part, "word/footnotes.xml")
|| STR_STARTS_WITH(part, "word/endnotes.xml")
|| STR_STARTS_WITH(part, "word/footer")
|| STR_STARTS_WITH(part, "word/header")
STR_STARTS_WITH_CONSTANT(part, "word/document.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/footnotes.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/endnotes.xml")
|| STR_STARTS_WITH_CONSTANT(part, "word/footer")
|| STR_STARTS_WITH_CONSTANT(part, "word/header")
// PowerPoint
|| STR_STARTS_WITH(part, "ppt/slides/slide")
|| STR_STARTS_WITH(part, "ppt/notesSlides/slide")
|| STR_STARTS_WITH_CONSTANT(part, "ppt/slides/slide")
|| STR_STARTS_WITH_CONSTANT(part, "ppt/notesSlides/slide")
// Excel
|| STR_STARTS_WITH(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH(part, "xl/workbook.xml")
|| STR_STARTS_WITH_CONSTANT(part, "xl/worksheets/sheet")
|| STR_STARTS_WITH_CONSTANT(part, "xl/sharedStrings.xml")
|| STR_STARTS_WITH_CONSTANT(part, "xl/workbook.xml")
) {
return TRUE;
}

View File

@ -143,7 +143,7 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
libraw_gps_info_t gps = libraw_lib->other.parsed_gps;
double gps_longitude_dec =
(gps.longtitude[0] + gps.longtitude[1] / 60 + gps.longtitude[2] / 3600) * DMS_REF(gps.longref);
(gps.longitude[0] + gps.longitude[1] / 60 + gps.longitude[2] / 3600) * DMS_REF(gps.longref);
snprintf(tmp, sizeof(tmp), "%.15f", gps_longitude_dec);
if (gps_longitude_dec != 0.0) {
APPEND_STR_META(doc, MetaExifGpsLongitudeDec, tmp)
@ -163,7 +163,13 @@ void parse_raw(scan_raw_ctx_t *ctx, vfile_t *f, document_t *doc) {
return;
}
libraw_unpack_thumb(libraw_lib);
int unpack_ret = libraw_unpack_thumb(libraw_lib);
if (unpack_ret != 0) {
CTX_LOG_ERRORF(f->filepath, "libraw_unpack_thumb returned error code %d", unpack_ret)
free(buf);
libraw_close(libraw_lib);
return;
}
int errc = 0;
libraw_processed_image_t *thumb = libraw_dcraw_make_mem_thumb(libraw_lib, &errc);

View File

@ -61,6 +61,7 @@ enum metakey {
MetaFontName,
MetaParent,
MetaExifMake,
MetaExifDescription,
MetaExifSoftware,
MetaExifExposureTime,
MetaExifFNumber,

View File

@ -7,7 +7,7 @@
#include "../third-party/utf8.h/utf8.h"
#include "macros.h"
#define STR_STARTS_WITH(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
#define STR_STARTS_WITH_CONSTANT(x, y) (strncmp(y, x, sizeof(y) - 1) == 0)
#define TEXT_BUF_FULL (-1)
#define INITIAL_BUF_SIZE (1024 * 16)

View File

@ -227,7 +227,7 @@ TEST(Ebook, Utf8Pdf) {
parse_ebook(&ebook_500_ctx, &f, "application/pdf", &doc);
ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "最後測試 "));
ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "最後測試 "));
cleanup(&doc, &f);
}
@ -245,7 +245,7 @@ TEST(Ebook, Utf8PdfInvalidChars) {
// It should say "HART is a group of highly qualified ..." but the PDF
// text is been intentionally fucked with by the authors
// We can at least filter out the non-printable/invalid characters like '<27>' etc
ASSERT_TRUE(STR_STARTS_WITH(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified "));
ASSERT_TRUE(STR_STARTS_WITH_CONSTANT(get_meta(&doc, MetaContent)->str_val, "HART i a g f highl alified "));
cleanup(&doc, &f);
}
@ -780,6 +780,19 @@ TEST(Arc, EncryptedZip) {
}
/* RAW */
TEST(RAW, Segfault1) {
vfile_t f;
document_t doc;
load_doc_file("libscan-test-files/test_files/raw/segfault1.dng", &f, &doc);
parse_raw(&raw_ctx, &f, &doc);
ASSERT_EQ(get_meta(&doc, MetaWidth)->long_val, 3840);
ASSERT_EQ(get_meta(&doc, MetaHeight)->long_val, 7680);
cleanup(&doc, &f);
}
TEST(RAW, Panasonic) {
vfile_t f;
document_t doc;