Compare commits

..

No commits in common. "master" and "3.4.0" have entirely different histories.

58 changed files with 16082 additions and 7765 deletions

View File

@ -7,36 +7,11 @@ platform:
arch: amd64
steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: build
image: sist2app/sist2-build
depends_on:
- submodules
image: simon987/sist2-build
commands:
- ./scripts/build.sh
- name: scp files
depends_on:
- build
image: appleboy/drone-scp
settings:
host:
@ -47,11 +22,26 @@ steps:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
---
kind: pipeline
@ -62,36 +52,11 @@ platform:
arch: arm64
steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag
- name: build
image: sist2app/sist2-build-arm64
depends_on:
- submodules
image: simon987/sist2-build-arm64
commands:
- ./scripts/build_arm64.sh
- name: scp files
depends_on:
- build
image: appleboy/drone-scp
settings:
host:
@ -102,7 +67,22 @@ steps:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux
- ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

View File

@ -147,7 +147,6 @@ add_dependencies(
target_link_libraries(
sist2
m
z
argparse
unofficial::mongoose::mongoose

View File

@ -1,4 +1,5 @@
FROM sist2app/sist2-build as build
FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/

View File

@ -1,4 +1,5 @@
FROM sist2app/sist2-build-arm64 as build
FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/

View File

@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@ -38,32 +38,26 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)*
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
restart: unless-stopped
volumes:
# This directory must have 1000:1000 permissions (or update PUID & PGID below)
- /data/sist2-es-data/:/usr/share/elasticsearch/data
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "PUID=1000"
- "PGID=1000"
sist2-admin:
image: sist2app/sist2:x64-linux
image: simon987/sist2:3.3.4-x64-linux
restart: unless-stopped
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host
- ./sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090
# NOTE: Don't expose this port publicly!
- 8080:8080
- 4090:4090 # sist2
- 8080:8080 # sist2-admin
working_dir: /root/sist2-admin/
entrypoint: python3
command:
- /root/sist2-admin/sist2_admin/app.py
entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
```
Navigate to http://localhost:8080/ to configure sist2-admin.
@ -79,7 +73,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
@ -88,30 +82,28 @@ Example usage:
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
2. Prepare search index:
* **Elasticsearch**: `sist2 index --es-url http://localhost:9200 ./documents.sist2`
* **SQLite**: `sist2 sqlite-index --search-index ./search.sist2 ./documents.sist2`
3. Start web interface:
* **Elasticsearch**: `sist2 web ./documents.sist2`
* **SQLite**: `sist2 web --search-index ./search.sist2 ./documents.sist2`
* **SQLite**: `sist2 index --search-index ./search.sist2 ./documents.sist2`
3. Start web interface: `sist2 web ./documents.sist2`
## Format support
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@ -135,7 +127,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `sist2app/sist2` image comes with common languages
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language
@ -175,13 +167,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
<details>
<summary>Screenshot</summary>
@ -197,7 +189,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker
```bash
git clone --recursive https://github.com/sist2app/sist2/
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
@ -212,16 +204,16 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
3. Install vcpkg dependencies
```bash
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
```
4. Build
```bash
git clone --recursive https://github.com/sist2app/sist2/
git clone --recursive https://github.com/simon987/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@ -4,21 +4,16 @@ services:
elasticsearch:
image: elasticsearch:7.17.9
container_name: sist2-es
volumes:
# This directory must have 1000:1000 permissions (or update PUID & PGID below)
- /data/sist2-es-data/:/usr/share/elasticsearch/data
environment:
- "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "PUID=1000"
- "PGID=1000"
sist2-admin:
build:
context: .
container_name: sist2-admin
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host
- /mnt/array/sist2-admin-data/:/sist2-admin/
- /:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!

View File

@ -172,10 +172,6 @@ Using a version >=7.14.0 is recommended to enable the following features:
- Bug fix for large documents (See #198)
Using a version >=8.0.0 is recommended to enable the following features:
- Approximate KNN search for Embeddings search (faster queries).
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
If you don't care about the features above, you can ignore it or disable it in the configuration page.
@ -189,7 +185,7 @@ Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float
In theory, embeddings can be created for any type of documents (image, text, audio etc.).
For example, the [clip](https://github.com/sist2app/sist2-script-clip) User Script, generates 512-d embeddings of images
For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.
@ -229,4 +225,4 @@ See [Automatic tagging](#automatic-tagging) for information about tag
### Automatic tagging
See [scripting](scripting.md) documentation.
See [scripting](scripting.md) documentation.

View File

@ -2,6 +2,8 @@
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
(
cd sist2-vue/
npm install

View File

@ -1,16 +1,8 @@
MAGIC_PATHS = [
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
"watch": "vue-cli-service build --watch"
},
"dependencies": {
"axios": "^1.6.0",
"axios": "^0.27.2",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"moment": "^2.29.3",

View File

@ -4,7 +4,7 @@
<b-container class="pt-4">
<b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/sist2app/sist2/issues/new/choose" target="_blank">issue tracker on
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on
Github</a>. Thank you!
</b-alert>
<router-view v-if="$store.state.sist2AdminInfo"/>

View File

@ -89,12 +89,9 @@ class Sist2AdminApi {
/**
* @param {string} name
* @param {bool} full
*/
runJob(name, full) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`, {
params: {full}
});
runJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`);
}
/**

View File

@ -95,7 +95,6 @@ export default {
methods: {
onOcrLangChange() {
this.options.ocr_lang = this.selectedOcrLangs.join("+");
this.update();
},
update() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;

View File

@ -1,70 +1,59 @@
<template>
<div>
<h4>{{ $t("webOptions.title") }}</h4>
<b-card>
<label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN', 'pl', 'de']"
@change="update()"></b-form-select>
<div>
<h4>{{ $t("webOptions.title") }}</h4>
<b-card>
<label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN', 'pl', 'de']"
@change="update()"></b-form-select>
<label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()" :disabled="Boolean(options.auth)"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()"></b-form-input>
</b-card>
<b-form-checkbox v-model="options.verbose" @change="update()">
{{$t("webOptions.verbose")}}
</b-form-checkbox>
</b-card>
<br>
<h4>Auth0 options</h4>
<b-card>
<label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<br>
<h4>Auth0 options</h4>
<b-card>
<label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0Domain") }}</label>
<b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0Domain") }}</label>
<b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0ClientId") }}</label>
<b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0ClientId") }}</label>
<b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0PublicKey") }}</label>
<b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</b-card>
</div>
<label>{{ $t("webOptions.auth0PublicKey") }}</label>
<b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</b-card>
</div>
</template>
<script>
export default {
name: "WebOptions",
props: ["options", "frontendName"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: ""
}
},
methods: {
update() {
console.log(this.options)
if (this.options.auth && this.options.tag_auth) {
// If both are set, remove tagAuth
this.options.tag_auth = "";
}
this.$emit("change", this.options);
name: "WebOptions",
props: ["options", "frontendName"],
data() {
return {
showEsTestAlert: false,
esTestOk: false,
esTestMessage: "",
}
},
}
methods: {
update() {
this.$emit("change", this.options);
},
}
}
</script>

View File

@ -8,7 +8,6 @@ export default {
view: "View",
delete: "Delete",
runNow: "Index now",
runNowFull: "Full re-index",
create: "Create",
cancel: "Cancel",
test: "Test",
@ -65,9 +64,6 @@ export default {
gitRepository: "Git repository URL",
extraArgs: "Extra command line arguments",
couldNotStartFrontend: "Could not start frontend",
couldNotStartFrontendBody: "Unable to start the frontend, check server logs for more details.",
selectJobs: "Available jobs",
selectJob: "Select a job",
webOptions: {
@ -81,7 +77,6 @@ export default {
auth0Domain: "Auth0 domain",
auth0ClientId: "Auth0 client ID",
auth0PublicKey: "Auth0 public key",
verbose: "Verbose logs"
},
backendOptions: {
title: "Search backend options",

View File

@ -1,63 +1,63 @@
<template>
<b-card>
<b-card-title>
{{ name }}
<small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small>
</b-card-title>
<b-card>
<b-card-title>
{{ name }}
<small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small>
</b-card-title>
<!-- Action buttons-->
<div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }}
</b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div>
<!-- Action buttons-->
<div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop")
}}
</b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }}
</b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else>
<h4>{{ $t("backendOptions.title") }}</h4>
<b-card>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<h4>{{ $t("backendOptions.title") }}</h4>
<b-card>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<SearchBackendSelect :value="frontend.web_options.search_backend"
@change="onBackendSelect($event)"></SearchBackendSelect>
<SearchBackendSelect :value="frontend.web_options.search_backend"
@change="onBackendSelect($event)"></SearchBackendSelect>
<br>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card>
<br>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card>
<br/>
<br/>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name"
@change="update()"></WebOptions>
<br/>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name"
@change="update()"></WebOptions>
<br/>
<h4>{{ $t("frontendOptions.title") }}</h4>
<b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }}
</b-form-checkbox>
<h4>{{ $t("frontendOptions.title") }}</h4>
<b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }}
</b-form-checkbox>
<label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
</b-card>
</b-card-body>
</b-card>
<label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
</b-card>
</b-card-body>
</b-card>
</template>
<script>
@ -68,78 +68,71 @@ import WebOptions from "@/components/WebOptions";
import SearchBackendSelect from "@/components/SearchBackendSelect.vue";
export default {
name: 'Frontend',
components: {SearchBackendSelect, JobCheckboxGroup, WebOptions},
data() {
return {
loading: true,
frontend: null,
}
},
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
name: 'Frontend',
components: {SearchBackendSelect, JobCheckboxGroup, WebOptions},
data() {
return {
loading: true,
frontend: null,
}
},
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
},
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
}
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
},
methods: {
start() {
Sist2AdminApi.startFrontend(this.name).then(() => {
this.frontend.running = true;
}).catch(() => {
this.$bvToast.toast(this.$t("couldNotStartFrontendBody"), {
title: this.$t("couldNotStartFrontend"),
variant: "danger",
toaster: "b-toaster-bottom-right"
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
});
},
stop() {
this.frontend.running = false;
Sist2AdminApi.stopFrontend(this.name)
},
deleteFrontend() {
Sist2AdminApi.deleteFrontend(this.name).then(() => {
this.$router.push("/");
});
},
update() {
Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
onBackendSelect(backend) {
this.frontend.web_options.search_backend = backend;
this.frontend.jobs = [];
this.update();
methods: {
start() {
this.frontend.running = true;
Sist2AdminApi.startFrontend(this.name)
},
stop() {
this.frontend.running = false;
Sist2AdminApi.stopFrontend(this.name)
},
deleteFrontend() {
Sist2AdminApi.deleteFrontend(this.name).then(() => {
this.$router.push("/");
});
},
update() {
Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
onBackendSelect(backend) {
this.frontend.web_options.search_backend = backend;
this.frontend.jobs = [];
this.update();
}
}
}
}
</script>

View File

@ -6,19 +6,7 @@
</b-card-title>
<div class="mb-3">
<b-dropdown
split
split-variant="primary"
variant="primary"
:text="$t('runNow')"
class="mr-1"
:disabled="!valid"
@click="runJob()"
>
<b-dropdown-item href="#" @click="runJob(true)">{{ $t("runNowFull") }}</b-dropdown-item>
</b-dropdown>
<b-button class="mr-1" variant="primary" @click="runJob()" :disabled="!valid">{{ $t("runNow") }}</b-button>
<b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button>
</div>
@ -81,7 +69,6 @@ export default {
return {
loading: true,
job: null,
console: console
}
},
methods: {
@ -91,8 +78,8 @@ export default {
update() {
Sist2AdminApi.updateJob(this.getName(), this.job);
},
runJob(full = false) {
Sist2AdminApi.runJob(this.getName(), full).then(() => {
runJob() {
Sist2AdminApi.runJob(this.getName()).then(() => {
this.$bvToast.toast(this.$t("runJobConfirmation"), {
title: this.$t("runJobConfirmationTitle"),
variant: "success",

View File

@ -170,6 +170,6 @@ span.ADMIN {
margin: 3px;
white-space: pre;
color: #000;
overflow-y: hidden;
overflow: hidden;
}
</style>
</style>

File diff suppressed because it is too large Load Diff

View File

@ -4,4 +4,4 @@ uvicorn
websockets
pycron
GitPython
git+https://github.com/sist2app/sist2-python.git@2.1
git+https://github.com/simon987/sist2-python.git

View File

@ -2,7 +2,6 @@ import asyncio
import os
import signal
from datetime import datetime
from time import sleep
from urllib.parse import urlparse
import requests
@ -26,7 +25,6 @@ from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEM
get_log_files_to_remove, delete_log_file, create_default_search_backends
from web import Sist2Frontend
from script import UserScript, SCRIPT_TEMPLATES
from util import tail_sync, pid_is_running
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
@ -171,14 +169,11 @@ def _run_job(job: Sist2Job):
@app.get("/api/job/{name:str}/run")
async def run_job(name: str, full: bool = False):
job: Sist2Job = db["jobs"][name]
async def run_job(name: str):
job = db["jobs"][name]
if not job:
raise HTTPException(status_code=404)
if full:
job.do_full_scan = True
_run_job(job)
return "ok"
@ -326,18 +321,7 @@ def start_frontend_(frontend: Sist2Frontend):
logger.debug(f"Fetched search backend options for {backend_name}")
pid = sist2.web(frontend.web_options, search_backend, frontend.name)
sleep(0.2)
if not pid_is_running(pid):
frontend_log = frontend.get_log_path(LOG_FOLDER)
logger.error(f"Frontend exited too quickly, check {frontend_log} for more details:")
for line in tail_sync(frontend.get_log_path(LOG_FOLDER), 3):
logger.error(line.strip())
return False
RUNNING_FRONTENDS[frontend.name] = pid
return True
@app.post("/api/frontend/{name:str}/start")
@ -346,12 +330,7 @@ async def start_frontend(name: str):
if not frontend:
raise HTTPException(status_code=404)
ok = start_frontend_(frontend)
if not ok:
raise HTTPException(status_code=500)
return "ok"
start_frontend_(frontend)
@app.post("/api/frontend/{name:str}/stop")

View File

@ -204,7 +204,7 @@ class Sist2IndexTask(Sist2Task):
self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}, {ok=}"}))
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})")

View File

@ -96,7 +96,7 @@ SCRIPT_TEMPLATES = {
"CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript(
name=name,
type=ScriptType.GIT,
git_repository="https://github.com/sist2app/sist2-script-clip",
git_repository="https://github.com/simon987/sist2-script-clip",
extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff"
),
"Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript(

View File

@ -2,11 +2,10 @@ import datetime
import json
import logging
import os.path
import sys
from datetime import datetime
from enum import Enum
from io import TextIOWrapper
from logging import FileHandler, StreamHandler
from logging import FileHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
@ -201,7 +200,6 @@ class WebOptions(BaseModel):
auth0_client_id: str = None
auth0_public_key: str = None
auth0_public_key_file: str = None
verbose: bool = False
def __init__(self, **kwargs):
super().__init__(**kwargs)
@ -233,8 +231,6 @@ class WebOptions(BaseModel):
args.append(f"--tag-auth={self.tag_auth}")
if self.dev:
args.append(f"--dev")
if self.verbose:
args.append(f"--very-verbose")
args.extend(self.indices)
@ -261,7 +257,7 @@ class Sist2:
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, None, proc))
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
@ -288,7 +284,7 @@ class Sist2:
set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, None, proc))
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
self._consume_logs_stdout(logs_cb, proc)
@ -298,7 +294,7 @@ class Sist2:
return proc.returncode
@staticmethod
def _consume_logs_stderr(logs_cb, exit_cb, proc):
def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
try:
for line in pipe_wrapper:
@ -306,9 +302,7 @@ class Sist2:
continue
logs_cb({"stderr": line})
finally:
return_code = proc.wait()
if exit_cb:
exit_cb(return_code)
proc.wait()
pipe_wrapper.close()
@staticmethod
@ -342,19 +336,15 @@ class Sist2:
web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
web_logger.addHandler(StreamHandler())
def logs_cb(message):
web_logger.info(json.dumps(message))
def exit_cb(return_code):
logger.info(f"Web frontend exited with return code {return_code}")
logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, exit_cb, proc))
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))

View File

@ -1,41 +0,0 @@
from glob import glob
import os
from config import DATA_FOLDER
def get_old_index_files(name):
files = glob(os.path.join(DATA_FOLDER, f"scan-{name.replace('/', '_')}-*.sist2"))
files = list(sorted(files, key=lambda f: os.stat(f).st_mtime))
files = files[-1:]
return files
def tail_sync(filename, lines=1, _buffer=4098):
with open(filename) as f:
lines_found = []
block_counter = -1
while len(lines_found) < lines:
try:
f.seek(block_counter * _buffer, os.SEEK_END)
except IOError:
f.seek(0)
lines_found = f.readlines()
break
lines_found = f.readlines()
block_counter -= 1
return lines_found[-lines:]
def pid_is_running(pid):
try:
os.kill(pid, 0)
except OSError:
return False
return True

Binary file not shown.

14732
sist2-vue/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
"dependencies": {
"@auth0/auth0-spa-js": "^2.0.2",
"@egjs/vue-infinitegrid": "3.3.0",
"axios": "^1.6.0",
"axios": "^0.25.0",
"bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5",
"d3": "^5.6.1",
@ -17,7 +17,7 @@
"dom-to-image": "^2.6.0",
"fslightbox-vue": "fslightbox-vue.tgz",
"nouislider": "^15.2.0",
"onnxruntime-web": "1.15.1",
"onnxruntime-web": "^1.15.1",
"underscore": "^1.13.1",
"vue": "^2.6.12",
"vue-color": "^2.8.1",

View File

@ -144,6 +144,8 @@ class Sist2Api {
}
_getIndexRoot(indexId) {
console.log(indexId)
console.log(this.sist2Info.indices.find(idx => idx.id === indexId))
return this.sist2Info.indices.find(idx => idx.id === indexId).root;
}
@ -309,7 +311,7 @@ class Sist2Api {
}
getTagsSqlite() {
return axios.get(`${this.baseUrl}fts/tags`)
return axios.get(`${this.baseUrl}/fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
@ -566,7 +568,7 @@ class Sist2Api {
}
getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}fts/d/${sid}`)
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
.then(resp => ({
_source: resp.data
}));
@ -589,7 +591,7 @@ class Sist2Api {
}
getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
@ -620,7 +622,7 @@ class Sist2Api {
}
getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data));
}
}

View File

@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
filters.push({range: {mtime: {lte: dateMax}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@ -59,7 +59,7 @@ export default {
const fields = [
"title", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author", "media_comment",
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
"modified_by", "pages", "tag",
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",

View File

@ -77,7 +77,6 @@ export default {
return listener(e);
}
};
},
methods: {
keyDownListener(e) {

View File

@ -9,7 +9,7 @@
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
href="https://github.com/sist2app/sist2/blob/master/docs/USAGE.md#elasticsearch"
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch"
target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span>
</span>

View File

@ -138,7 +138,7 @@ export default {
},
debug: "Debug information",
debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" +
" new features, please submit a new issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>here</a>.",
" new features, please submit a new issue <a href='https://github.com/simon987/sist2/issues/new/choose'>here</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Elasticsearch connection error",
@ -318,7 +318,7 @@ export default {
},
debug: "Debug Informationen",
debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
"neue Features hast, poste sie bitte <a href='https://github.com/sist2app/sist2/issues/new/choose'>hier</a>.",
"neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Elasticsearch Verbindungsfehler",
@ -494,7 +494,7 @@ export default {
debug: "Information de débogage",
debugDescription: "Informations utiles pour le débogage\n" +
"Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," +
" veuillez soumettre un nouvel Issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>ici</a>.",
" veuillez soumettre un nouvel Issue <a href='https://github.com/simon987/sist2/issues/new/choose'>ici</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Erreur de connexion Elasticsearch",
@ -668,7 +668,7 @@ export default {
},
debug: "调试信息",
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" +
"<a href='https://github.com/sist2app/sist2/issues/new/choose'>这里</a>.",
"<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.",
tagline: "标签栏",
toast: {
esConnErrTitle: "Elasticsearch连接错误",
@ -846,7 +846,7 @@ export default {
},
debug: "Informacje dla programistów",
debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" +
" propozycje zmian, zgłoś to proszę <a href='https://github.com/sist2app/sist2/issues/new/choose'>tutaj</a>.",
" propozycje zmian, zgłoś to proszę <a href='https://github.com/simon987/sist2/issues/new/choose'>tutaj</a>.",
tagline: "Slogan",
toast: {
esConnErrTitle: "Problem z połączeniem z Elasticsearch",

View File

@ -22,9 +22,7 @@ export class CLIPTransformerModel {
async loadModel(onProgress) {
ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
if (window.crossOriginIsolated) {
ort.env.wasm.numThreads = 2;
}
ort.env.wasm.numThreads = 2;
let buf = await ModelStore.get(this._modelUrl);
if (!buf) {

View File

@ -58,7 +58,7 @@ export default new Vuex.Store({
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
optMlRepositories: "https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json",
optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
optAutoAnalyze: false,
optMlDefaultModel: null,

View File

@ -81,7 +81,6 @@
<li><code>doc.artist</code></li>
<li><code>doc.title</code></li>
<li><code>doc.genre</code></li>
<li><code>doc.media_comment</code></li>
<li><code>doc.album_artist</code></li>
<li><code>doc.exif_make</code></li>
<li><code>doc.exif_model</code></li>

View File

@ -1,5 +1,3 @@
#!/usr/bin/env bash
export NODE_OPTIONS=--openssl-legacy-provider
./node_modules/@vue/cli-service/bin/vue-cli-service.js build --watch

View File

@ -25,7 +25,6 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./",
NULL
};

View File

@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX];
strcpy(buf, current_job);
SET_CURRENT_JOB(ipc_ctx, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
}
@ -478,7 +478,8 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list", -1, &stmt, NULL);
sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list "
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t));

View File

@ -64,8 +64,6 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct {
double date_min;
double date_max;

View File

@ -102,9 +102,7 @@ void database_fts_index(database_t *db) {
db->db, "DELETE FROM fts.mime_index;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.mime_index (index_id, mime, count) "
"SELECT index_id, mime, count(*) FROM fts.document_index "
"WHERE mime IS NOT NULL "
"GROUP BY index_id, mime",
"SELECT index_id, mime, count(*) FROM fts.document_index GROUP BY index_id, mime",
NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating path index");

View File

@ -206,7 +206,7 @@ response_t *web_put(const char *url, const char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_SHARE, 0);
curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);

View File

@ -30,8 +30,6 @@ char *get_meta_key_text(enum metakey meta_key) {
return "genre";
case MetaTitle:
return "title";
case MetaMediaComment:
return "media_comment";
case MetaFontName:
return "font_name";
case MetaExifMake:
@ -161,7 +159,6 @@ void write_document(document_t *doc) {
case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef:
case MetaChecksum:
case MetaMediaComment:
case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val);

View File

@ -11,6 +11,7 @@
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "auth0/auth0_c_api.h"
#include <signal.h>
#include <pthread.h>
@ -424,8 +425,6 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
exit(1);
}
return 0;
}
int main(int argc, const char *argv[]) {
@ -545,7 +544,7 @@ int main(int argc, const char *argv[]) {
OPT_END(),
};
struct argparse argparse = {};
struct argparse argparse;
argparse_init(&argparse, options, usage, 0);
argparse_describe(
&argparse,

View File

@ -142,10 +142,6 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
}
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath);
@ -165,8 +161,7 @@ void parse(parse_job_t *job) {
return;
}
int document_exists = database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime);
if (document_exists) {
if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) {
CLOSE_FILE(job->vfile)
free(doc);
return;

View File

@ -51,17 +51,17 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.4.2"
#define VERSION "3.4.0"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 3;
static const int VersionPatch = 0;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.16"
#define EXPECTED_MONGOOSE_VERSION "7.7"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@ -50,13 +50,13 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
int model_id = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
int model_id = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
database_t *db = web_get_database(sid.index_id);
if (db == NULL) {
@ -86,11 +86,11 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
char index_id_str[9];
char arg_stat_type[5];
memcpy(index_id_str, hm->uri.buf + 3, 8);
memcpy(index_id_str, hm->uri.ptr + 3, 8);
*(index_id_str + 8) = '\0';
int index_id = (int) strtol(index_id_str, NULL, 16);
memcpy(arg_stat_type, hm->uri.buf + 3 + 9, 4);
memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4);
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
@ -108,6 +108,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *json = database_get_stats(db, stat_type);
mg_send_json(nc, json);
cJSON_Delete(json);
}
@ -135,19 +136,19 @@ void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm
}
}
void serve_favicon_ico(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_favicon_ico(nc);
}
void serve_style_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_style_css(nc);
}
void serve_chunk_vendors_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_chunk_vendors_css(nc);
}
void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm), int index_id,
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int index_id,
int doc_id, int arg_num) {
database_t *db = web_get_database(index_id);
@ -168,7 +169,6 @@ void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm
"Cache-Control: max-age=31536000"
);
mg_send(nc, data, data_len);
nc->is_resp = 0;
free(data);
} else {
HTTP_REPLY_NOT_FOUND
@ -179,13 +179,13 @@ void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm
void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
int num = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
int num = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num);
}
@ -193,8 +193,8 @@ void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
@ -210,14 +210,13 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
nc->is_resp = 1;
}
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@ -383,7 +382,11 @@ void index_info(struct mg_connection *nc) {
cJSON_AddStringToObject(json, "searchBackend", "elasticsearch");
}
mg_send_json(nc, json);
char *json_str = cJSON_PrintUnformatted(json);
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json);
}
@ -416,8 +419,8 @@ cJSON *get_root_document_by_id(int index_id, int doc_id) {
void file(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
@ -430,11 +433,6 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *source = get_root_document_by_id(sid.index_id, sid.doc_id);
if (source == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
if (strlen(idx->desc.rewrite_url) == 0) {
serve_file_from_disk(source, idx, nc, hm);
} else {
@ -452,7 +450,6 @@ void status(struct mg_connection *nc) {
}
free(status);
nc->is_resp = 0;
}
typedef struct {
@ -528,14 +525,14 @@ subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.buf + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.ptr + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
@ -612,7 +609,7 @@ int check_auth0(struct mg_http_message *hm) {
}
token_str = malloc(token.len + 1);
strncpy(token_str, token.buf, token.len);
strncpy(token_str, token.ptr, token.len);
*(token_str + token.len) = '\0';
int res = auth0_verify_jwt(
@ -630,7 +627,7 @@ int check_auth0(struct mg_http_message *hm) {
return TRUE;
}
static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data;
@ -642,15 +639,13 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
}
char uri[256];
memcpy(uri, hm->uri.buf, hm->uri.len);
memcpy(uri, hm->uri.ptr, hm->uri.len);
*(uri + hm->uri.len) = '\0';
LOG_DEBUGF("serve.c", "<%s> GET %s",
web_address_to_string(&(nc->rem)),
uri
);
#define mg_http_match_uri(hm, pattern) mg_match((hm)->uri, mg_str(pattern), NULL)
if (mg_http_match_uri(hm, "/")) {
serve_index_html(nc, hm);
return;
@ -743,7 +738,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
if (r->status_code == 200) {
web_send_headers(nc, 200, r->size, "Content-Type: application/json");
mg_send(nc, r->body, r->size);
nc->is_resp = 0;
} else if (r->status_code == 0) {
sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");

View File

@ -420,8 +420,8 @@ void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.buf + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.buf);
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.ptr + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND
return;
}

View File

@ -5,37 +5,31 @@
void web_serve_asset_index_html(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_html), HTTP_CROSS_ORIGIN_HEADERS "Content-Type: text/html");
mg_send(nc, index_html, sizeof(index_html));
nc->is_resp = 0;
}
void web_serve_asset_index_js(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
mg_send(nc, index_js, sizeof(index_js));
nc->is_resp = 0;
}
void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
nc->is_resp = 0;
}
void web_serve_asset_favicon_ico(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
mg_send(nc, favicon_ico, sizeof(favicon_ico));
nc->is_resp = 0;
}
void web_serve_asset_style_css(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css");
mg_send(nc, index_css, sizeof(index_css));
nc->is_resp = 0;
}
void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
nc->is_resp = 0;
}
index_t *web_get_index_by_id(int index_id) {
@ -73,7 +67,7 @@ cJSON *web_get_json_body(struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
@ -87,7 +81,7 @@ char *web_get_string_body(struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len);
memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0';
return body;
@ -98,7 +92,6 @@ void mg_send_json(struct mg_connection *nc, const cJSON *json) {
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
nc->is_resp = 0;
free(json_str);
}

View File

@ -16,26 +16,9 @@ database_t *web_get_database(int index_id);
__always_inline
static char *web_address_to_string(struct mg_addr *addr) {
static char address_to_string_buf[64];
static char address_to_string_buf[INET6_ADDRSTRLEN];
if (addr->is_ip6) {
snprintf(address_to_string_buf, sizeof(address_to_string_buf),
"%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x",
addr->ip[0], addr->ip[1],
addr->ip[2], addr->ip[3],
addr->ip[4], addr->ip[5],
addr->ip[6], addr->ip[7],
addr->ip[8], addr->ip[9],
addr->ip[10], addr->ip[11],
addr->ip[12], addr->ip[13],
addr->ip[14], addr->ip[15]);
} else {
snprintf(address_to_string_buf, sizeof(address_to_string_buf),
"%d.%d.%d.%d",
addr->ip[0], addr->ip[1], addr->ip[2], addr->ip[3]);
}
return address_to_string_buf;
return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
}
void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers);

View File

@ -106,33 +106,12 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
find_library(CMS_LIB NAMES lcms2)
find_library(JAS_LIB NAMES jasper)
find_library(GUMBO_LIB NAMES gumbo)
find_library(GOMP_LIB NAMES libgomp.a gomp
PATHS
/usr/lib/gcc/x86_64-linux-gnu/5/
/usr/lib/gcc/x86_64-linux-gnu/6/
/usr/lib/gcc/x86_64-linux-gnu/7/
/usr/lib/gcc/x86_64-linux-gnu/8/
/usr/lib/gcc/x86_64-linux-gnu/9/
/usr/lib/gcc/x86_64-linux-gnu/10/
/usr/lib/gcc/x86_64-linux-gnu/11/
/usr/lib/gcc/x86_64-linux-gnu/12/
/usr/lib/gcc/aarch64-linux-gnu/5/
/usr/lib/gcc/aarch64-linux-gnu/6/
/usr/lib/gcc/aarch64-linux-gnu/7/
/usr/lib/gcc/aarch64-linux-gnu/8/
/usr/lib/gcc/aarch64-linux-gnu/9/
/usr/lib/gcc/aarch64-linux-gnu/10/
/usr/lib/gcc/aarch64-linux-gnu/11/
/usr/lib/gcc/aarch64-linux-gnu/12/
)
find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/8/ /usr/lib/gcc/aarch64-linux-gnu/8/)
find_package(Leptonica CONFIG REQUIRED)
find_package(FFMPEG REQUIRED)
find_package(libraw CONFIG REQUIRED)
find_package(Freetype REQUIRED)
find_package(FFMPEG REQUIRED)
list(REMOVE_ITEM FFMPEG_LIBRARIES /usr/lib/x86_64-linux-gnu/libm.a)
list(REMOVE_ITEM FFMPEG_LIBRARIES /usr/lib/aarch64-linux-gnu/libm.a)
target_compile_options(
scan
@ -187,6 +166,7 @@ target_link_libraries(
${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a
${FREETYPE_LIB}
${HARFBUZZ_LIB}
${JBIG2DEC_LIB}

View File

@ -175,19 +175,9 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE;
}
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
}
@ -195,10 +185,6 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
}

View File

@ -223,10 +223,14 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
if (meta_contains_key(doc->meta_head, key)) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
key, tag->value);
return;
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
}
text_buffer_t tex = text_buffer_create(-1);
@ -268,7 +272,6 @@ static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx
APPEND_TAG_META(MetaAlbum);
} else if (strcmp(key, "comment") == 0) {
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
APPEND_TAG_META(MetaMediaComment);
}
}
}
@ -441,7 +444,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}
@ -565,9 +568,6 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
CTX_LOG_DEBUGF(doc->filepath,
"Will not generate thumbnail because image is too small: %dx%d",
stream->codecpar->width, stream->codecpar->width);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return;

View File

@ -5,8 +5,8 @@
#include <tesseract/capi.h>
#define MIN_OCR_WIDTH 350
#define MIN_OCR_HEIGHT 33
#define MIN_OCR_LEN 3
#define MIN_OCR_HEIGHT 100
#define MIN_OCR_LEN 10
#define OCR_IS_VALID_BPP(d) \
((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 || \
@ -28,13 +28,7 @@ ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
// https://github.com/simon987/sist2/issues/443
if (strstr(tesseract_lang, "chi") != NULL) {
TessBaseAPISetVariable(api, "preserve_interword_spaces", "1");
}
// TODO: add this as param?
// TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
TessBaseAPISetSourceResolution(api, img_xres);

View File

@ -63,7 +63,6 @@ enum metakey {
MetaAlbumArtist,
MetaGenre,
MetaTitle,
MetaMediaComment,
MetaFontName,
MetaExifMake,
MetaExifDescription,
@ -172,8 +171,6 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h"

View File

@ -392,18 +392,4 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job;
}
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif