Compare commits

..

No commits in common. "master" and "3.3.3" have entirely different histories.

60 changed files with 16104 additions and 7808 deletions

View File

@ -7,36 +7,11 @@ platform:
arch: amd64 arch: amd64
steps: steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: build - name: build
image: sist2app/sist2-build image: simon987/sist2-build
depends_on:
- submodules
commands: commands:
- ./scripts/build.sh - ./scripts/build.sh
- name: scp files - name: scp files
depends_on:
- build
image: appleboy/drone-scp image: appleboy/drone-scp
settings: settings:
host: host:
@ -47,11 +22,26 @@ steps:
from_secret: SSH_USER from_secret: SSH_USER
key: key:
from_secret: SSH_KEY from_secret: SSH_KEY
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./VERSION - ./VERSION
- ./sist2-x64-linux - ./sist2-x64-linux
- ./sist2-x64-linux-debug - ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
--- ---
kind: pipeline kind: pipeline
@ -62,36 +52,11 @@ platform:
arch: arm64 arch: arm64
steps: steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag
- name: build - name: build
image: sist2app/sist2-build-arm64 image: simon987/sist2-build-arm64
depends_on:
- submodules
commands: commands:
- ./scripts/build_arm64.sh - ./scripts/build_arm64.sh
- name: scp files - name: scp files
depends_on:
- build
image: appleboy/drone-scp image: appleboy/drone-scp
settings: settings:
host: host:
@ -102,7 +67,22 @@ steps:
from_secret: SSH_USER from_secret: SSH_USER
key: key:
from_secret: SSH_KEY from_secret: SSH_KEY
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./sist2-arm64-linux - ./sist2-arm64-linux
- ./sist2-arm64-linux-debug - ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

View File

@ -147,7 +147,6 @@ add_dependencies(
target_link_libraries( target_link_libraries(
sist2 sist2
m
z z
argparse argparse
unofficial::mongoose::mongoose unofficial::mongoose::mongoose

View File

@ -1,4 +1,5 @@
FROM sist2app/sist2-build as build FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/ WORKDIR /build/

View File

@ -1,4 +1,5 @@
FROM sist2app/sist2-build-arm64 as build FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/ WORKDIR /build/

View File

@ -1,11 +1,9 @@
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg) ![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2) [![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/) [![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/) **Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
**Community URL:** [Discord](https://discord.gg/2PEjDy3Rfs)
# sist2 # sist2
sist2 (Simple incremental search tool) sist2 (Simple incremental search tool)
@ -38,32 +36,26 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)* ### Using Docker Compose *(Windows/Linux/Mac)*
```yaml ```yaml
version: "3"
services: services:
elasticsearch: elasticsearch:
image: elasticsearch:7.17.9 image: elasticsearch:7.17.9
restart: unless-stopped restart: unless-stopped
volumes:
# This directory must have 1000:1000 permissions (or update PUID & PGID below)
- /data/sist2-es-data/:/usr/share/elasticsearch/data
environment: environment:
- "discovery.type=single-node" - "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g" - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "PUID=1000"
- "PGID=1000"
sist2-admin: sist2-admin:
image: sist2app/sist2:x64-linux image: simon987/sist2:3.3.3-x64-linux
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - ./sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host - /:/host
ports: ports:
- 4090:4090 - 4090:4090 # sist2
# NOTE: Don't expose this port publicly! - 8080:8080 # sist2-admin
- 8080:8080
working_dir: /root/sist2-admin/ working_dir: /root/sist2-admin/
entrypoint: python3 entrypoint: python3 /root/sist2-admin/sist2_admin/app.py
command:
- /root/sist2-admin/sist2_admin/app.py
``` ```
Navigate to http://localhost:8080/ to configure sist2-admin. Navigate to http://localhost:8080/ to configure sist2-admin.
@ -79,7 +71,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
``` ```
* **SQLite**: No installation required * **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases). 2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`. Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage. 3. See [usage guide](docs/USAGE.md) for command line usage.
@ -88,30 +80,28 @@ Example usage:
1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2` 1. Scan a directory: `sist2 scan ~/Documents --output ./documents.sist2`
2. Prepare search index: 2. Prepare search index:
* **Elasticsearch**: `sist2 index --es-url http://localhost:9200 ./documents.sist2` * **Elasticsearch**: `sist2 index --es-url http://localhost:9200 ./documents.sist2`
* **SQLite**: `sist2 sqlite-index --search-index ./search.sist2 ./documents.sist2` * **SQLite**: `sist2 index --search-index ./search.sist2 ./documents.sist2`
3. Start web interface: 3. Start web interface: `sist2 web ./documents.sist2`
* **Elasticsearch**: `sist2 web ./documents.sist2`
* **SQLite**: `sist2 web --search-index ./search.sist2 ./documents.sist2`
## Format support ## Format support
| File type | Library | Content | Thumbnail | Metadata | | File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------| |:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | | pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - | | cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags | | `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist | | `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | | `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags | | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - | | `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - | | html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | | tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title | | docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title | | doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title | | mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* | | wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - | | json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*
@ -135,7 +125,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `sist2app/sist2` image comes with common languages The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed. (hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language You can use the `+` separator to specify multiple languages. The language
@ -163,6 +153,7 @@ indices, but it uses much less memory and is easier to set up.
| Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) | | Query syntax | [fts5](https://www.sqlite.org/fts5.html) | [query_string](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax) |
| Fuzzy search | | ✓ | | Fuzzy search | | ✓ |
| Media Types tree real-time updating | | ✓ | | Media Types tree real-time updating | | ✓ |
| Search in file `path` | [WIP](https://github.com/simon987/sist2/issues/402) | ✓ |
| Manual tagging | ✓ | ✓ | | Manual tagging | ✓ | ✓ |
| User scripts | ✓ | ✓ | | User scripts | ✓ | ✓ |
| Media Type breakdown for search results | | ✓ | | Media Type breakdown for search results | | ✓ |
@ -175,13 +166,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it. to enable it.
The text processing is done in your browser, no data is sent to any third-party services. The text processing is done in your browser, no data is sent to any third-party services.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details. See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
#### List of available repositories: #### List of available repositories:
| URL | Maintainer | Purpose | | URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------| |---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General | | [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
<details> <details>
<summary>Screenshot</summary> <summary>Screenshot</summary>
@ -197,7 +188,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker ### Using docker
```bash ```bash
git clone --recursive https://github.com/sist2app/sist2/ git clone --recursive https://github.com/simon987/sist2/
cd sist2 cd sist2
docker build . -t my-sist2-image docker build . -t my-sist2-image
# Copy sist2 executable from docker image # Copy sist2 executable from docker image
@ -212,16 +203,16 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
``` ```
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg 2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
3. Install vcpkg dependencies 3. Install vcpkg dependencies
```bash ```bash
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib] vcpkg install openblas curl[core,openssl] sqlite3[core,fts5] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
``` ```
4. Build 4. Build
```bash ```bash
git clone --recursive https://github.com/sist2app/sist2/ git clone --recursive https://github.com/simon987/sist2/
(cd sist2-vue; npm install; npm run build) (cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build) (cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@ -4,21 +4,16 @@ services:
elasticsearch: elasticsearch:
image: elasticsearch:7.17.9 image: elasticsearch:7.17.9
container_name: sist2-es container_name: sist2-es
volumes:
# This directory must have 1000:1000 permissions (or update PUID & PGID below)
- /data/sist2-es-data/:/usr/share/elasticsearch/data
environment: environment:
- "discovery.type=single-node" - "discovery.type=single-node"
- "ES_JAVA_OPTS=-Xms2g -Xmx2g" - "ES_JAVA_OPTS=-Xms2g -Xmx2g"
- "PUID=1000"
- "PGID=1000"
sist2-admin: sist2-admin:
build: build:
context: . context: .
container_name: sist2-admin container_name: sist2-admin
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - /mnt/array/sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host - /:/host
ports: ports:
- 4090:4090 - 4090:4090
# NOTE: Don't export this port publicly! # NOTE: Don't export this port publicly!

View File

@ -172,10 +172,6 @@ Using a version >=7.14.0 is recommended to enable the following features:
- Bug fix for large documents (See #198) - Bug fix for large documents (See #198)
Using a version >=8.0.0 is recommended to enable the following features:
- Approximate KNN search for Embeddings search (faster queries).
When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI. When using a legacy version of ES, a notice will be displayed next to the sist2 version in the web UI.
If you don't care about the features above, you can ignore it or disable it in the configuration page. If you don't care about the features above, you can ignore it or disable it in the configuration page.
@ -189,7 +185,7 @@ Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float
In theory, embeddings can be created for any type of documents (image, text, audio etc.). In theory, embeddings can be created for any type of documents (image, text, audio etc.).
For example, the [clip](https://github.com/sist2app/sist2-script-clip) User Script, generates 512-d embeddings of images For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search" (videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime. textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.

View File

@ -2,6 +2,8 @@
VCPKG_ROOT="/vcpkg" VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
( (
cd sist2-vue/ cd sist2-vue/
npm install npm install

View File

@ -1,16 +1,8 @@
MAGIC_PATHS = [ try:
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc", with open("/usr/lib/file/magic.mgc", "rb") as f:
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc", data = f.read()
"/usr/lib/file/magic.mgc" except:
] data = bytes([])
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data))) print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@
"watch": "vue-cli-service build --watch" "watch": "vue-cli-service build --watch"
}, },
"dependencies": { "dependencies": {
"axios": "^1.6.0", "axios": "^0.27.2",
"bootstrap-vue": "^2.21.2", "bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5", "core-js": "^3.6.5",
"moment": "^2.29.3", "moment": "^2.29.3",

View File

@ -4,7 +4,7 @@
<b-container class="pt-4"> <b-container class="pt-4">
<b-alert show dismissible variant="info"> <b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/sist2app/sist2/issues/new/choose" target="_blank">issue tracker on to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on
Github</a>. Thank you! Github</a>. Thank you!
</b-alert> </b-alert>
<router-view v-if="$store.state.sist2AdminInfo"/> <router-view v-if="$store.state.sist2AdminInfo"/>

View File

@ -89,12 +89,9 @@ class Sist2AdminApi {
/** /**
* @param {string} name * @param {string} name
* @param {bool} full
*/ */
runJob(name, full) { runJob(name) {
return axios.get(`${this.baseUrl}/api/job/${name}/run`, { return axios.get(`${this.baseUrl}/api/job/${name}/run`);
params: {full}
});
} }
/** /**

View File

@ -95,7 +95,6 @@ export default {
methods: { methods: {
onOcrLangChange() { onOcrLangChange() {
this.options.ocr_lang = this.selectedOcrLangs.join("+"); this.options.ocr_lang = this.selectedOcrLangs.join("+");
this.update();
}, },
update() { update() {
this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false; this.disableOcrLang = this.options.ocr_images === false && this.options.ocr_ebooks === false;

View File

@ -1,70 +1,59 @@
<template> <template>
<div> <div>
<h4>{{ $t("webOptions.title") }}</h4> <h4>{{ $t("webOptions.title") }}</h4>
<b-card> <b-card>
<label>{{ $t("webOptions.lang") }}</label> <label>{{ $t("webOptions.lang") }}</label>
<b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN', 'pl', 'de']" <b-form-select v-model="options.lang" :options="['en', 'fr', 'zh-CN', 'pl', 'de']"
@change="update()"></b-form-select> @change="update()"></b-form-select>
<label>{{ $t("webOptions.bind") }}</label> <label>{{ $t("webOptions.bind") }}</label>
<b-form-input v-model="options.bind" @change="update()"></b-form-input> <b-form-input v-model="options.bind" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagline") }}</label> <label>{{ $t("webOptions.tagline") }}</label>
<b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea> <b-form-textarea v-model="options.tagline" @change="update()"></b-form-textarea>
<label>{{ $t("webOptions.auth") }}</label> <label>{{ $t("webOptions.auth") }}</label>
<b-form-input v-model="options.auth" @change="update()"></b-form-input> <b-form-input v-model="options.auth" @change="update()"></b-form-input>
<label>{{ $t("webOptions.tagAuth") }}</label> <label>{{ $t("webOptions.tagAuth") }}</label>
<b-form-input v-model="options.tag_auth" @change="update()" :disabled="Boolean(options.auth)"></b-form-input> <b-form-input v-model="options.tag_auth" @change="update()"></b-form-input>
</b-card>
<b-form-checkbox v-model="options.verbose" @change="update()"> <br>
{{$t("webOptions.verbose")}} <h4>Auth0 options</h4>
</b-form-checkbox> <b-card>
</b-card> <label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<br> <label>{{ $t("webOptions.auth0Domain") }}</label>
<h4>Auth0 options</h4> <b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input>
<b-card>
<label>{{ $t("webOptions.auth0Audience") }}</label>
<b-form-input v-model="options.auth0_audience" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0Domain") }}</label> <label>{{ $t("webOptions.auth0ClientId") }}</label>
<b-form-input v-model="options.auth0_domain" @change="update()"></b-form-input> <b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input>
<label>{{ $t("webOptions.auth0ClientId") }}</label> <label>{{ $t("webOptions.auth0PublicKey") }}</label>
<b-form-input v-model="options.auth0_client_id" @change="update()"></b-form-input> <b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</b-card>
<label>{{ $t("webOptions.auth0PublicKey") }}</label> </div>
<b-textarea rows="10" v-model="options.auth0_public_key" @change="update()"></b-textarea>
</b-card>
</div>
</template> </template>
<script> <script>
export default { export default {
name: "WebOptions", name: "WebOptions",
props: ["options", "frontendName"], props: ["options", "frontendName"],
data() { data() {
return { return {
showEsTestAlert: false, showEsTestAlert: false,
esTestOk: false, esTestOk: false,
esTestMessage: "" esTestMessage: "",
} }
},
methods: {
update() {
console.log(this.options)
if (this.options.auth && this.options.tag_auth) {
// If both are set, remove tagAuth
this.options.tag_auth = "";
}
this.$emit("change", this.options);
}, },
} methods: {
update() {
this.$emit("change", this.options);
},
}
} }
</script> </script>

View File

@ -8,7 +8,6 @@ export default {
view: "View", view: "View",
delete: "Delete", delete: "Delete",
runNow: "Index now", runNow: "Index now",
runNowFull: "Full re-index",
create: "Create", create: "Create",
cancel: "Cancel", cancel: "Cancel",
test: "Test", test: "Test",
@ -65,9 +64,6 @@ export default {
gitRepository: "Git repository URL", gitRepository: "Git repository URL",
extraArgs: "Extra command line arguments", extraArgs: "Extra command line arguments",
couldNotStartFrontend: "Could not start frontend",
couldNotStartFrontendBody: "Unable to start the frontend, check server logs for more details.",
selectJobs: "Available jobs", selectJobs: "Available jobs",
selectJob: "Select a job", selectJob: "Select a job",
webOptions: { webOptions: {
@ -81,7 +77,6 @@ export default {
auth0Domain: "Auth0 domain", auth0Domain: "Auth0 domain",
auth0ClientId: "Auth0 client ID", auth0ClientId: "Auth0 client ID",
auth0PublicKey: "Auth0 public key", auth0PublicKey: "Auth0 public key",
verbose: "Verbose logs"
}, },
backendOptions: { backendOptions: {
title: "Search backend options", title: "Search backend options",

View File

@ -1,63 +1,63 @@
<template> <template>
<b-card> <b-card>
<b-card-title> <b-card-title>
{{ name }} {{ name }}
<small style="vertical-align: top"> <small style="vertical-align: top">
<b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge> <b-badge v-if="!loading && frontend.running" variant="success">{{ $t("online") }}</b-badge>
<b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge> <b-badge v-else-if="!loading" variant="secondary">{{ $t("offline") }}</b-badge>
</small> </small>
</b-card-title> </b-card-title>
<!-- Action buttons--> <!-- Action buttons-->
<div class="mb-3" v-if="!loading"> <div class="mb-3" v-if="!loading">
<b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{ <b-button class="mr-1" :disabled="frontend.running || !valid" variant="success" @click="start()">{{
$t("start") $t("start")
}} }}
</b-button> </b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{ <b-button class="mr-1" :disabled="!frontend.running" variant="danger" @click="stop()">{{
$t("stop") $t("stop")
}} }}
</b-button> </b-button>
<b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank"> <b-button class="mr-1" :disabled="!frontend.running" variant="primary" :href="frontendUrl" target="_blank">
{{ $t("go") }} {{ $t("go") }}
</b-button> </b-button>
<b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button> <b-button variant="danger" @click="deleteFrontend()">{{ $t("delete") }}</b-button>
</div> </div>
<b-progress v-if="loading" striped animated value="100"></b-progress> <b-progress v-if="loading" striped animated value="100"></b-progress>
<b-card-body v-else> <b-card-body v-else>
<h4>{{ $t("backendOptions.title") }}</h4> <h4>{{ $t("backendOptions.title") }}</h4>
<b-card> <b-card>
<b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert> <b-alert v-if="!valid" variant="warning" show>{{ $t("frontendOptions.noJobSelectedWarning") }}</b-alert>
<SearchBackendSelect :value="frontend.web_options.search_backend" <SearchBackendSelect :value="frontend.web_options.search_backend"
@change="onBackendSelect($event)"></SearchBackendSelect> @change="onBackendSelect($event)"></SearchBackendSelect>
<br> <br>
<JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup> <JobCheckboxGroup :frontend="frontend" @input="update()"></JobCheckboxGroup>
</b-card> </b-card>
<br/> <br/>
<WebOptions :options="frontend.web_options" :frontend-name="$route.params.name" <WebOptions :options="frontend.web_options" :frontend-name="$route.params.name"
@change="update()"></WebOptions> @change="update()"></WebOptions>
<br/> <br/>
<h4>{{ $t("frontendOptions.title") }}</h4> <h4>{{ $t("frontendOptions.title") }}</h4>
<b-card> <b-card>
<b-form-checkbox v-model="frontend.auto_start" @change="update()"> <b-form-checkbox v-model="frontend.auto_start" @change="update()">
{{ $t("autoStart") }} {{ $t("autoStart") }}
</b-form-checkbox> </b-form-checkbox>
<label>{{ $t("extraQueryArgs") }}</label> <label>{{ $t("extraQueryArgs") }}</label>
<b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input> <b-form-input v-model="frontend.extra_query_args" @change="update()"></b-form-input>
<label>{{ $t("customUrl") }}</label> <label>{{ $t("customUrl") }}</label>
<b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input> <b-form-input v-model="frontend.custom_url" @change="update()" placeholder="http://"></b-form-input>
</b-card> </b-card>
</b-card-body> </b-card-body>
</b-card> </b-card>
</template> </template>
<script> <script>
@ -68,78 +68,71 @@ import WebOptions from "@/components/WebOptions";
import SearchBackendSelect from "@/components/SearchBackendSelect.vue"; import SearchBackendSelect from "@/components/SearchBackendSelect.vue";
export default { export default {
name: 'Frontend', name: 'Frontend',
components: {SearchBackendSelect, JobCheckboxGroup, WebOptions}, components: {SearchBackendSelect, JobCheckboxGroup, WebOptions},
data() { data() {
return { return {
loading: true, loading: true,
frontend: null, frontend: null,
} }
},
computed: {
valid() {
return !this.loading && this.frontend.jobs.length > 0;
}, },
frontendUrl() { computed: {
if (this.frontend.custom_url) { valid() {
return this.frontend.custom_url + this.args; return !this.loading && this.frontend.jobs.length > 0;
} },
frontendUrl() {
if (this.frontend.custom_url) {
return this.frontend.custom_url + this.args;
}
if (this.frontend.web_options.bind.startsWith("0.0.0.0")) { if (this.frontend.web_options.bind.startsWith("0.0.0.0")) {
return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args; return window.location.protocol + "//" + window.location.hostname + ":" + this.port + this.args;
} }
return window.location.protocol + "//" + this.frontend.web_options.bind + this.args; return window.location.protocol + "//" + this.frontend.web_options.bind + this.args;
},
name() {
return this.$route.params.name;
},
port() {
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
}, },
name() { mounted() {
return this.$route.params.name; Sist2AdminApi.getFrontend(this.name).then(resp => {
}, this.frontend = resp.data;
port() { this.loading = false;
return this.frontend.web_options.bind.split(":")[1]
},
args() {
const args = this.frontend.extra_query_args;
if (args !== "") {
return "#" + (args.startsWith("?") ? (args) : ("?" + args));
}
return "";
}
},
mounted() {
Sist2AdminApi.getFrontend(this.name).then(resp => {
this.frontend = resp.data;
this.loading = false;
});
},
methods: {
start() {
Sist2AdminApi.startFrontend(this.name).then(() => {
this.frontend.running = true;
}).catch(() => {
this.$bvToast.toast(this.$t("couldNotStartFrontendBody"), {
title: this.$t("couldNotStartFrontend"),
variant: "danger",
toaster: "b-toaster-bottom-right"
}); });
});
}, },
stop() { methods: {
this.frontend.running = false; start() {
Sist2AdminApi.stopFrontend(this.name) this.frontend.running = true;
}, Sist2AdminApi.startFrontend(this.name)
deleteFrontend() { },
Sist2AdminApi.deleteFrontend(this.name).then(() => { stop() {
this.$router.push("/"); this.frontend.running = false;
}); Sist2AdminApi.stopFrontend(this.name)
}, },
update() { deleteFrontend() {
Sist2AdminApi.updateFrontend(this.name, this.frontend); Sist2AdminApi.deleteFrontend(this.name).then(() => {
}, this.$router.push("/");
onBackendSelect(backend) { });
this.frontend.web_options.search_backend = backend; },
this.frontend.jobs = []; update() {
this.update(); Sist2AdminApi.updateFrontend(this.name, this.frontend);
},
onBackendSelect(backend) {
this.frontend.web_options.search_backend = backend;
this.frontend.jobs = [];
this.update();
}
} }
}
} }
</script> </script>

View File

@ -6,19 +6,7 @@
</b-card-title> </b-card-title>
<div class="mb-3"> <div class="mb-3">
<b-button class="mr-1" variant="primary" @click="runJob()" :disabled="!valid">{{ $t("runNow") }}</b-button>
<b-dropdown
split
split-variant="primary"
variant="primary"
:text="$t('runNow')"
class="mr-1"
:disabled="!valid"
@click="runJob()"
>
<b-dropdown-item href="#" @click="runJob(true)">{{ $t("runNowFull") }}</b-dropdown-item>
</b-dropdown>
<b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button> <b-button variant="danger" @click="deleteJob()">{{ $t("delete") }}</b-button>
</div> </div>
@ -81,7 +69,6 @@ export default {
return { return {
loading: true, loading: true,
job: null, job: null,
console: console
} }
}, },
methods: { methods: {
@ -91,8 +78,8 @@ export default {
update() { update() {
Sist2AdminApi.updateJob(this.getName(), this.job); Sist2AdminApi.updateJob(this.getName(), this.job);
}, },
runJob(full = false) { runJob() {
Sist2AdminApi.runJob(this.getName(), full).then(() => { Sist2AdminApi.runJob(this.getName()).then(() => {
this.$bvToast.toast(this.$t("runJobConfirmation"), { this.$bvToast.toast(this.$t("runJobConfirmation"), {
title: this.$t("runJobConfirmationTitle"), title: this.$t("runJobConfirmationTitle"),
variant: "success", variant: "success",

View File

@ -170,6 +170,6 @@ span.ADMIN {
margin: 3px; margin: 3px;
white-space: pre; white-space: pre;
color: #000; color: #000;
overflow-y: hidden; overflow: hidden;
} }
</style> </style>

File diff suppressed because it is too large Load Diff

View File

@ -4,4 +4,4 @@ uvicorn
websockets websockets
pycron pycron
GitPython GitPython
git+https://github.com/sist2app/sist2-python.git@2.1 git+https://github.com/simon987/sist2-python.git

View File

@ -2,7 +2,6 @@ import asyncio
import os import os
import signal import signal
from datetime import datetime from datetime import datetime
from time import sleep
from urllib.parse import urlparse from urllib.parse import urlparse
import requests import requests
@ -26,7 +25,6 @@ from state import migrate_v1_to_v2, RUNNING_FRONTENDS, TESSERACT_LANGS, DB_SCHEM
get_log_files_to_remove, delete_log_file, create_default_search_backends get_log_files_to_remove, delete_log_file, create_default_search_backends
from web import Sist2Frontend from web import Sist2Frontend
from script import UserScript, SCRIPT_TEMPLATES from script import UserScript, SCRIPT_TEMPLATES
from util import tail_sync, pid_is_running
sist2 = Sist2(SIST2_BINARY, DATA_FOLDER) sist2 = Sist2(SIST2_BINARY, DATA_FOLDER)
db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db")) db = PersistentState(dbfile=os.path.join(DATA_FOLDER, "state.db"))
@ -171,14 +169,11 @@ def _run_job(job: Sist2Job):
@app.get("/api/job/{name:str}/run") @app.get("/api/job/{name:str}/run")
async def run_job(name: str, full: bool = False): async def run_job(name: str):
job: Sist2Job = db["jobs"][name] job = db["jobs"][name]
if not job: if not job:
raise HTTPException(status_code=404) raise HTTPException(status_code=404)
if full:
job.do_full_scan = True
_run_job(job) _run_job(job)
return "ok" return "ok"
@ -326,18 +321,7 @@ def start_frontend_(frontend: Sist2Frontend):
logger.debug(f"Fetched search backend options for {backend_name}") logger.debug(f"Fetched search backend options for {backend_name}")
pid = sist2.web(frontend.web_options, search_backend, frontend.name) pid = sist2.web(frontend.web_options, search_backend, frontend.name)
sleep(0.2)
if not pid_is_running(pid):
frontend_log = frontend.get_log_path(LOG_FOLDER)
logger.error(f"Frontend exited too quickly, check {frontend_log} for more details:")
for line in tail_sync(frontend.get_log_path(LOG_FOLDER), 3):
logger.error(line.strip())
return False
RUNNING_FRONTENDS[frontend.name] = pid RUNNING_FRONTENDS[frontend.name] = pid
return True
@app.post("/api/frontend/{name:str}/start") @app.post("/api/frontend/{name:str}/start")
@ -346,12 +330,7 @@ async def start_frontend(name: str):
if not frontend: if not frontend:
raise HTTPException(status_code=404) raise HTTPException(status_code=404)
ok = start_frontend_(frontend) start_frontend_(frontend)
if not ok:
raise HTTPException(status_code=500)
return "ok"
@app.post("/api/frontend/{name:str}/stop") @app.post("/api/frontend/{name:str}/stop")

View File

@ -204,7 +204,7 @@ class Sist2IndexTask(Sist2Task):
self.job.previous_index_path = self.job.index_path self.job.previous_index_path = self.job.index_path
db["jobs"][self.job.name] = self.job db["jobs"][self.job.name] = self.job
self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}, {ok=}"})) self._logger.info(json.dumps({"sist2-admin": f"Sist2Scan task finished {return_code=}, {duration=}"}))
logger.info(f"Completed {self.display_name} ({return_code=})") logger.info(f"Completed {self.display_name} ({return_code=})")

View File

@ -96,7 +96,7 @@ SCRIPT_TEMPLATES = {
"CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript( "CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript(
name=name, name=name,
type=ScriptType.GIT, type=ScriptType.GIT,
git_repository="https://github.com/sist2app/sist2-script-clip", git_repository="https://github.com/simon987/sist2-script-clip",
extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff" extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff"
), ),
"Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript( "Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript(

View File

@ -2,11 +2,10 @@ import datetime
import json import json
import logging import logging
import os.path import os.path
import sys
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from io import TextIOWrapper from io import TextIOWrapper
from logging import FileHandler, StreamHandler from logging import FileHandler
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from threading import Thread from threading import Thread
@ -201,7 +200,6 @@ class WebOptions(BaseModel):
auth0_client_id: str = None auth0_client_id: str = None
auth0_public_key: str = None auth0_public_key: str = None
auth0_public_key_file: str = None auth0_public_key_file: str = None
verbose: bool = False
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
@ -233,8 +231,6 @@ class WebOptions(BaseModel):
args.append(f"--tag-auth={self.tag_auth}") args.append(f"--tag-auth={self.tag_auth}")
if self.dev: if self.dev:
args.append(f"--dev") args.append(f"--dev")
if self.verbose:
args.append(f"--very-verbose")
args.extend(self.indices) args.extend(self.indices)
@ -261,7 +257,7 @@ class Sist2:
set_pid_cb(proc.pid) set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, None, proc)) t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start() t_stderr.start()
self._consume_logs_stdout(logs_cb, proc) self._consume_logs_stdout(logs_cb, proc)
@ -288,7 +284,7 @@ class Sist2:
set_pid_cb(proc.pid) set_pid_cb(proc.pid)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, None, proc)) t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start() t_stderr.start()
self._consume_logs_stdout(logs_cb, proc) self._consume_logs_stdout(logs_cb, proc)
@ -298,7 +294,7 @@ class Sist2:
return proc.returncode return proc.returncode
@staticmethod @staticmethod
def _consume_logs_stderr(logs_cb, exit_cb, proc): def _consume_logs_stderr(logs_cb, proc):
pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore") pipe_wrapper = TextIOWrapper(proc.stderr, encoding="utf8", errors="ignore")
try: try:
for line in pipe_wrapper: for line in pipe_wrapper:
@ -306,9 +302,7 @@ class Sist2:
continue continue
logs_cb({"stderr": line}) logs_cb({"stderr": line})
finally: finally:
return_code = proc.wait() proc.wait()
if exit_cb:
exit_cb(return_code)
pipe_wrapper.close() pipe_wrapper.close()
@staticmethod @staticmethod
@ -342,19 +336,15 @@ class Sist2:
web_logger = logging.Logger(name=f"sist2-frontend-{name}") web_logger = logging.Logger(name=f"sist2-frontend-{name}")
web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log"))) web_logger.addHandler(FileHandler(os.path.join(LOG_FOLDER, f"frontend-{name}.log")))
web_logger.addHandler(StreamHandler())
def logs_cb(message): def logs_cb(message):
web_logger.info(json.dumps(message)) web_logger.info(json.dumps(message))
def exit_cb(return_code):
logger.info(f"Web frontend exited with return code {return_code}")
logger.info(f"Starting frontend {' '.join(args)}") logger.info(f"Starting frontend {' '.join(args)}")
proc = Popen(args, stdout=PIPE, stderr=PIPE) proc = Popen(args, stdout=PIPE, stderr=PIPE)
t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, exit_cb, proc)) t_stderr = Thread(target=self._consume_logs_stderr, args=(logs_cb, proc))
t_stderr.start() t_stderr.start()
t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc)) t_stdout = Thread(target=self._consume_logs_stdout, args=(logs_cb, proc))

View File

@ -1,41 +0,0 @@
from glob import glob
import os
from config import DATA_FOLDER
def get_old_index_files(name):
files = glob(os.path.join(DATA_FOLDER, f"scan-{name.replace('/', '_')}-*.sist2"))
files = list(sorted(files, key=lambda f: os.stat(f).st_mtime))
files = files[-1:]
return files
def tail_sync(filename, lines=1, _buffer=4098):
with open(filename) as f:
lines_found = []
block_counter = -1
while len(lines_found) < lines:
try:
f.seek(block_counter * _buffer, os.SEEK_END)
except IOError:
f.seek(0)
lines_found = f.readlines()
break
lines_found = f.readlines()
block_counter -= 1
return lines_found[-lines:]
def pid_is_running(pid):
try:
os.kill(pid, 0)
except OSError:
return False
return True

Binary file not shown.

14732
sist2-vue/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
"dependencies": { "dependencies": {
"@auth0/auth0-spa-js": "^2.0.2", "@auth0/auth0-spa-js": "^2.0.2",
"@egjs/vue-infinitegrid": "3.3.0", "@egjs/vue-infinitegrid": "3.3.0",
"axios": "^1.6.0", "axios": "^0.25.0",
"bootstrap-vue": "^2.21.2", "bootstrap-vue": "^2.21.2",
"core-js": "^3.6.5", "core-js": "^3.6.5",
"d3": "^5.6.1", "d3": "^5.6.1",
@ -17,7 +17,7 @@
"dom-to-image": "^2.6.0", "dom-to-image": "^2.6.0",
"fslightbox-vue": "fslightbox-vue.tgz", "fslightbox-vue": "fslightbox-vue.tgz",
"nouislider": "^15.2.0", "nouislider": "^15.2.0",
"onnxruntime-web": "1.15.1", "onnxruntime-web": "^1.15.1",
"underscore": "^1.13.1", "underscore": "^1.13.1",
"vue": "^2.6.12", "vue": "^2.6.12",
"vue-color": "^2.8.1", "vue-color": "^2.8.1",

View File

@ -33,6 +33,18 @@ class Sist2Api {
getSist2Info() { getSist2Info() {
return axios.get(`${this.baseUrl}i`).then(resp => { return axios.get(`${this.baseUrl}i`).then(resp => {
const indices = resp.data.indices;
resp.data.indices = indices.map(idx => {
return {
id: idx.id,
name: idx.name,
timestamp: idx.timestamp,
version: idx.version,
models: idx.models,
};
});
this.sist2Info = resp.data; this.sist2Info = resp.data;
return resp.data; return resp.data;
@ -143,10 +155,6 @@ class Sist2Api {
} }
} }
_getIndexRoot(indexId) {
return this.sist2Info.indices.find(idx => idx.id === indexId).root;
}
esQuery(query) { esQuery(query) {
return axios.post(`${this.baseUrl}es`, query).then(resp => { return axios.post(`${this.baseUrl}es`, query).then(resp => {
const res = resp.data; const res = resp.data;
@ -155,7 +163,6 @@ class Sist2Api {
res.hits.hits.forEach((hit) => { res.hits.hits.forEach((hit) => {
hit["_source"]["name"] = strUnescape(hit["_source"]["name"]); hit["_source"]["name"] = strUnescape(hit["_source"]["name"]);
hit["_source"]["path"] = strUnescape(hit["_source"]["path"]); hit["_source"]["path"] = strUnescape(hit["_source"]["path"]);
hit["_source"]["indexRoot"] = this._getIndexRoot(hit["_source"]["index"]);
this.setHitProps(hit); this.setHitProps(hit);
this.setHitTags(hit); this.setHitTags(hit);
@ -309,7 +316,7 @@ class Sist2Api {
} }
getTagsSqlite() { getTagsSqlite() {
return axios.get(`${this.baseUrl}fts/tags`) return axios.get(`${this.baseUrl}/fts/tags`)
.then(resp => { .then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count)) return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
}); });
@ -566,7 +573,7 @@ class Sist2Api {
} }
getDocumentSqlite(sid) { getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}fts/d/${sid}`) return axios.get(`${this.baseUrl}/fts/d/${sid}`)
.then(resp => ({ .then(resp => ({
_source: resp.data _source: resp.data
})); }));
@ -589,7 +596,7 @@ class Sist2Api {
} }
getTagSuggestionsSqlite(prefix) { getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix) return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
@ -620,7 +627,7 @@ class Sist2Api {
} }
getEmbeddings(sid, modelId) { getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`) return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
} }

View File

@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
} }
if (dateMin && dateMax) { if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}}) filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) { } else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}}) filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) { } else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}}) filters.push({range: {mtime: {lte: dateMax}}})
} }
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@ -106,8 +106,6 @@ class Sist2ElasticsearchQuery {
q["sortAsc"] = true; q["sortAsc"] = true;
} }
q["searchInPath"] = getters.optSearchInPath;
return q; return q;
} }
} }

View File

@ -59,7 +59,7 @@ export default {
const fields = [ const fields = [
"title", "duration", "audioc", "videoc", "title", "duration", "audioc", "videoc",
"bitrate", "artist", "album", "album_artist", "genre", "font_name", "author", "media_comment", "bitrate", "artist", "album", "album_artist", "genre", "font_name", "author",
"modified_by", "pages", "tag", "modified_by", "pages", "tag",
"exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length", "exif_make", "exif_software", "exif_exposure_time", "exif_fnumber", "exif_focal_length",
"exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime", "exif_user_comment", "exif_iso_speed_ratings", "exif_model", "exif_datetime",

View File

@ -77,7 +77,6 @@ export default {
return listener(e); return listener(e);
} }
}; };
}, },
methods: { methods: {
keyDownListener(e) { keyDownListener(e) {

View File

@ -9,7 +9,7 @@
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info"> <span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
href="https://github.com/sist2app/sist2/blob/master/docs/USAGE.md#elasticsearch" href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch"
target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span> target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span>
</span> </span>

View File

@ -138,7 +138,7 @@ export default {
}, },
debug: "Debug information", debug: "Debug information",
debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" + debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" +
" new features, please submit a new issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>here</a>.", " new features, please submit a new issue <a href='https://github.com/simon987/sist2/issues/new/choose'>here</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Elasticsearch connection error", esConnErrTitle: "Elasticsearch connection error",
@ -318,7 +318,7 @@ export default {
}, },
debug: "Debug Informationen", debug: "Debug Informationen",
debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " + debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
"neue Features hast, poste sie bitte <a href='https://github.com/sist2app/sist2/issues/new/choose'>hier</a>.", "neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Elasticsearch Verbindungsfehler", esConnErrTitle: "Elasticsearch Verbindungsfehler",
@ -494,7 +494,7 @@ export default {
debug: "Information de débogage", debug: "Information de débogage",
debugDescription: "Informations utiles pour le débogage\n" + debugDescription: "Informations utiles pour le débogage\n" +
"Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," + "Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," +
" veuillez soumettre un nouvel Issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>ici</a>.", " veuillez soumettre un nouvel Issue <a href='https://github.com/simon987/sist2/issues/new/choose'>ici</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Erreur de connexion Elasticsearch", esConnErrTitle: "Erreur de connexion Elasticsearch",
@ -668,7 +668,7 @@ export default {
}, },
debug: "调试信息", debug: "调试信息",
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" + debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" +
"<a href='https://github.com/sist2app/sist2/issues/new/choose'>这里</a>.", "<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.",
tagline: "标签栏", tagline: "标签栏",
toast: { toast: {
esConnErrTitle: "Elasticsearch连接错误", esConnErrTitle: "Elasticsearch连接错误",
@ -846,7 +846,7 @@ export default {
}, },
debug: "Informacje dla programistów", debug: "Informacje dla programistów",
debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" + debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" +
" propozycje zmian, zgłoś to proszę <a href='https://github.com/sist2app/sist2/issues/new/choose'>tutaj</a>.", " propozycje zmian, zgłoś to proszę <a href='https://github.com/simon987/sist2/issues/new/choose'>tutaj</a>.",
tagline: "Slogan", tagline: "Slogan",
toast: { toast: {
esConnErrTitle: "Problem z połączeniem z Elasticsearch", esConnErrTitle: "Problem z połączeniem z Elasticsearch",

View File

@ -22,9 +22,7 @@ export class CLIPTransformerModel {
async loadModel(onProgress) { async loadModel(onProgress) {
ort.env.wasm.wasmPaths = ORT_WASM_PATHS; ort.env.wasm.wasmPaths = ORT_WASM_PATHS;
if (window.crossOriginIsolated) { ort.env.wasm.numThreads = 2;
ort.env.wasm.numThreads = 2;
}
let buf = await ModelStore.get(this._modelUrl); let buf = await ModelStore.get(this._modelUrl);
if (!buf) { if (!buf) {

View File

@ -58,7 +58,7 @@ export default new Vuex.Store({
optVidPreviewInterval: 700, optVidPreviewInterval: 700,
optSimpleLightbox: true, optSimpleLightbox: true,
optShowTagPickerFilter: true, optShowTagPickerFilter: true,
optMlRepositories: "https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json", optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
optAutoAnalyze: false, optAutoAnalyze: false,
optMlDefaultModel: null, optMlDefaultModel: null,

View File

@ -81,7 +81,6 @@
<li><code>doc.artist</code></li> <li><code>doc.artist</code></li>
<li><code>doc.title</code></li> <li><code>doc.title</code></li>
<li><code>doc.genre</code></li> <li><code>doc.genre</code></li>
<li><code>doc.media_comment</code></li>
<li><code>doc.album_artist</code></li> <li><code>doc.album_artist</code></li>
<li><code>doc.exif_make</code></li> <li><code>doc.exif_make</code></li>
<li><code>doc.exif_model</code></li> <li><code>doc.exif_model</code></li>
@ -137,7 +136,7 @@
{{ $t("opt.fuzzy") }} {{ $t("opt.fuzzy") }}
</b-form-checkbox> </b-form-checkbox>
<b-form-checkbox :checked="optSearchInPath" @input="setOptSearchInPath">{{ <b-form-checkbox :disabled="uiSqliteMode" :checked="optSearchInPath" @input="setOptSearchInPath">{{
$t("opt.searchInPath") $t("opt.searchInPath")
}} }}
</b-form-checkbox> </b-form-checkbox>

View File

@ -1,5 +1,3 @@
#!/usr/bin/env bash #!/usr/bin/env bash
export NODE_OPTIONS=--openssl-legacy-provider
./node_modules/@vue/cli-service/bin/vue-cli-service.js build --watch ./node_modules/@vue/cli-service/bin/vue-cli-service.js build --watch

View File

@ -25,7 +25,6 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/", "/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/", "/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/", "/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./", "./",
NULL NULL
}; };

View File

@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX]; char buf[PATH_MAX];
strcpy(buf, current_job); strcpy(buf, current_job);
SET_CURRENT_JOB(ipc_ctx, current_job); strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC); sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
} }
@ -478,7 +478,8 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
database_iterator_t *database_create_delete_list_iterator(database_t *db) { database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt; sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list", -1, &stmt, NULL); sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list "
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t)); database_iterator_t *iter = malloc(sizeof(database_iterator_t));

View File

@ -64,8 +64,6 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2]; char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t; } database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct { typedef struct {
double date_min; double date_min;
double date_max; double date_max;

View File

@ -102,9 +102,7 @@ void database_fts_index(database_t *db) {
db->db, "DELETE FROM fts.mime_index;", NULL, NULL, NULL)); db->db, "DELETE FROM fts.mime_index;", NULL, NULL, NULL));
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, "INSERT INTO fts.mime_index (index_id, mime, count) " db->db, "INSERT INTO fts.mime_index (index_id, mime, count) "
"SELECT index_id, mime, count(*) FROM fts.document_index " "SELECT index_id, mime, count(*) FROM fts.document_index GROUP BY index_id, mime",
"WHERE mime IS NOT NULL "
"GROUP BY index_id, mime",
NULL, NULL, NULL)); NULL, NULL, NULL));
LOG_DEBUG("database_fts.c", "Generating path index"); LOG_DEBUG("database_fts.c", "Generating path index");
@ -162,8 +160,7 @@ void database_fts_index(database_t *db) {
CRASH_IF_NOT_SQLITE_OK(sqlite3_exec( CRASH_IF_NOT_SQLITE_OK(sqlite3_exec(
db->db, db->db,
"INSERT INTO search(rowid, name, content, title, path) " "INSERT INTO search(rowid, name, content, title) SELECT id, name, content, title from document_view",
"SELECT id, name, content, title, path from document_view",
NULL, NULL, NULL)); NULL, NULL, NULL));
} }

View File

@ -75,25 +75,23 @@ const char *FtsDatabaseSchema =
" WHERE id = OLD.id;" " WHERE id = OLD.id;"
" END;" " END;"
"" ""
"CREATE VIEW IF NOT EXISTS document_view (id, name, content, title, path)" "CREATE VIEW IF NOT EXISTS document_view (id, name, content, title)"
" AS" " AS"
" SELECT id," " SELECT id,"
" json_data->>'name'," " json_data->>'name',"
" json_data->>'content'," " json_data->>'content',"
" json_data->>'title'," " json_data->>'title'"
" json_data->>'path'"
" FROM document_index;" " FROM document_index;"
"" ""
"CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5 (" "CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5 ("
" name," " name,"
" content," " content,"
" title," " title,"
" path,"
" content='document_view'," " content='document_view',"
" content_rowid='id'" " content_rowid='id'"
");" ");"
// name^8, content^3, title^8, path^5 // name^8, content^3, title^8
"INSERT INTO search(search, rank) VALUES('rank', 'bm25(8, 3, 8, 5)');" "INSERT INTO search(search, rank) VALUES('rank', 'bm25(8, 3, 8)');"
""; "";
const char *IpcDatabaseSchema = const char *IpcDatabaseSchema =

View File

@ -90,7 +90,6 @@ subreq_ctx_t *web_post_async(const char *url, char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) { if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
} }
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, req->curl_err_buffer);
@ -124,7 +123,6 @@ response_t *web_get(const char *url, int timeout, int insecure) {
curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout); curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
if (insecure) { if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
} }
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
@ -164,7 +162,6 @@ response_t *web_post(const char *url, const char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) { if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
} }
char err_buffer[CURL_ERROR_SIZE + 1] = {}; char err_buffer[CURL_ERROR_SIZE + 1] = {};
@ -206,11 +203,10 @@ response_t *web_put(const char *url, const char *data, int insecure) {
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT"); curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
curl_easy_setopt(curl, CURLOPT_SHARE, 0); curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);
curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4); curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURLOPT_DNS_LOCAL_IP4);
if (insecure) { if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
} }
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
@ -245,7 +241,6 @@ response_t *web_delete(const char *url, int insecure) {
curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "sist2");
if (insecure) { if (insecure) {
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
} }
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, ""); curl_easy_setopt(curl, CURLOPT_POSTFIELDS, "");

View File

@ -30,8 +30,6 @@ char *get_meta_key_text(enum metakey meta_key) {
return "genre"; return "genre";
case MetaTitle: case MetaTitle:
return "title"; return "title";
case MetaMediaComment:
return "media_comment";
case MetaFontName: case MetaFontName:
return "font_name"; return "font_name";
case MetaExifMake: case MetaExifMake:
@ -161,7 +159,6 @@ void write_document(document_t *doc) {
case MetaExifGpsLatitudeDec: case MetaExifGpsLatitudeDec:
case MetaExifGpsLatitudeRef: case MetaExifGpsLatitudeRef:
case MetaChecksum: case MetaChecksum:
case MetaMediaComment:
case MetaTitle: { case MetaTitle: {
cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val); cJSON_AddStringToObject(json, get_meta_key_text(meta->key), meta->str_val);
buffer_size_guess += (int) strlen(meta->str_val); buffer_size_guess += (int) strlen(meta->str_val);

View File

@ -11,6 +11,7 @@
#include "web/serve.h" #include "web/serve.h"
#include "parsing/mime.h" #include "parsing/mime.h"
#include "parsing/parse.h" #include "parsing/parse.h"
#include "auth0/auth0_c_api.h"
#include <signal.h> #include <signal.h>
#include <pthread.h> #include <pthread.h>
@ -424,8 +425,6 @@ int set_to_negative_if_value_is_zero(UNUSED(struct argparse *self), const struct
fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name); fprintf(stderr, "error: option `--%s` Value must be >= 0\n", option->long_name);
exit(1); exit(1);
} }
return 0;
} }
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
@ -545,7 +544,7 @@ int main(int argc, const char *argv[]) {
OPT_END(), OPT_END(),
}; };
struct argparse argparse = {}; struct argparse argparse;
argparse_init(&argparse, options, usage, 0); argparse_init(&argparse, options, usage, 0);
argparse_describe( argparse_describe(
&argparse, &argparse,

View File

@ -142,10 +142,6 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums; job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
} }
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t)); document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath); strcpy(doc->filepath, job->filepath);
@ -165,8 +161,7 @@ void parse(parse_job_t *job) {
return; return;
} }
int document_exists = database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime); if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) {
if (document_exists) {
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
free(doc); free(doc);
return; return;

View File

@ -51,17 +51,17 @@
#include <ctype.h> #include <ctype.h>
#include "git_hash.h" #include "git_hash.h"
#define VERSION "3.4.2" #define VERSION "3.3.3"
static const char *const Version = VERSION; static const char *const Version = VERSION;
static const int VersionMajor = 3; static const int VersionMajor = 3;
static const int VersionMinor = 4; static const int VersionMinor = 3;
static const int VersionPatch = 3; static const int VersionPatch = 3;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown #define SIST_PLATFORM unknown
#endif #endif
#define EXPECTED_MONGOOSE_VERSION "7.16" #define EXPECTED_MONGOOSE_VERSION "7.7"
#define Q(x) #x #define Q(x) #x
#define QUOTE(x) Q(x) #define QUOTE(x) Q(x)

View File

@ -50,13 +50,13 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) { if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
int model_id = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10); int model_id = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
database_t *db = web_get_database(sid.index_id); database_t *db = web_get_database(sid.index_id);
if (db == NULL) { if (db == NULL) {
@ -86,11 +86,11 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
char index_id_str[9]; char index_id_str[9];
char arg_stat_type[5]; char arg_stat_type[5];
memcpy(index_id_str, hm->uri.buf + 3, 8); memcpy(index_id_str, hm->uri.ptr + 3, 8);
*(index_id_str + 8) = '\0'; *(index_id_str + 8) = '\0';
int index_id = (int) strtol(index_id_str, NULL, 16); int index_id = (int)strtol(index_id_str, NULL, 16);
memcpy(arg_stat_type, hm->uri.buf + 3 + 9, 4); memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4);
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0'; *(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type); database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
@ -108,6 +108,7 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *json = database_get_stats(db, stat_type); cJSON *json = database_get_stats(db, stat_type);
mg_send_json(nc, json); mg_send_json(nc, json);
cJSON_Delete(json); cJSON_Delete(json);
} }
@ -135,19 +136,19 @@ void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm
} }
} }
void serve_favicon_ico(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) { void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_favicon_ico(nc); web_serve_asset_favicon_ico(nc);
} }
void serve_style_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) { void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_style_css(nc); web_serve_asset_style_css(nc);
} }
void serve_chunk_vendors_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) { void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) {
web_serve_asset_chunk_vendors_css(nc); web_serve_asset_chunk_vendors_css(nc);
} }
void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm), int index_id, void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int index_id,
int doc_id, int arg_num) { int doc_id, int arg_num) {
database_t *db = web_get_database(index_id); database_t *db = web_get_database(index_id);
@ -168,7 +169,6 @@ void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm
"Cache-Control: max-age=31536000" "Cache-Control: max-age=31536000"
); );
mg_send(nc, data, data_len); mg_send(nc, data, data_len);
nc->is_resp = 0;
free(data); free(data);
} else { } else {
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
@ -179,13 +179,13 @@ void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm
void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) { void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) { if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
int num = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10); int num = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num); serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num);
} }
@ -193,8 +193,8 @@ void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) { if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
@ -210,14 +210,13 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
char url[4096]; char url[4096];
snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index); snprintf(url, 4096, "%s/%s/_search", WebCtx.es_url, WebCtx.es_index);
nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl); nc->fn_data = web_post_async(url, body, WebCtx.es_insecure_ssl);
nc->is_resp = 1;
} }
void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) { void serve_file_from_url(cJSON *json, index_t *idx, struct mg_connection *nc) {
@ -369,10 +368,6 @@ void index_info(struct mg_connection *nc) {
cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp); cJSON_AddNumberToObject(idx_json, "timestamp", (double) idx->desc.timestamp);
cJSON_AddItemToArray(arr, idx_json); cJSON_AddItemToArray(arr, idx_json);
#ifdef SIST_DEBUG_INFO
cJSON_AddStringToObject(idx_json, "root", idx->desc.root);
#endif
cJSON *models = database_get_models(idx->db); cJSON *models = database_get_models(idx->db);
cJSON_AddItemToObject(idx_json, "models", models); cJSON_AddItemToObject(idx_json, "models", models);
} }
@ -383,7 +378,11 @@ void index_info(struct mg_connection *nc) {
cJSON_AddStringToObject(json, "searchBackend", "elasticsearch"); cJSON_AddStringToObject(json, "searchBackend", "elasticsearch");
} }
mg_send_json(nc, json); char *json_str = cJSON_PrintUnformatted(json);
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str));
free(json_str);
cJSON_Delete(json); cJSON_Delete(json);
} }
@ -416,8 +415,8 @@ cJSON *get_root_document_by_id(int index_id, int doc_id) {
void file(struct mg_connection *nc, struct mg_http_message *hm) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) { if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
@ -430,11 +429,6 @@ void file(struct mg_connection *nc, struct mg_http_message *hm) {
cJSON *source = get_root_document_by_id(sid.index_id, sid.doc_id); cJSON *source = get_root_document_by_id(sid.index_id, sid.doc_id);
if (source == NULL) {
HTTP_REPLY_NOT_FOUND
return;
}
if (strlen(idx->desc.rewrite_url) == 0) { if (strlen(idx->desc.rewrite_url) == 0) {
serve_file_from_disk(source, idx, nc, hm); serve_file_from_disk(source, idx, nc, hm);
} else { } else {
@ -452,7 +446,6 @@ void status(struct mg_connection *nc) {
} }
free(status); free(status);
nc->is_resp = 0;
} }
typedef struct { typedef struct {
@ -487,7 +480,7 @@ tag_req_t *parse_tag_request(cJSON *json) {
return req; return req;
} }
subreq_ctx_t *elastic_delete_tag(const char *sid, const tag_req_t *req) { subreq_ctx_t *elastic_delete_tag(const char* sid, const tag_req_t *req) {
char *buf = malloc(sizeof(char) * 8192); char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192, snprintf(buf, 8192,
"{" "{"
@ -507,7 +500,7 @@ subreq_ctx_t *elastic_delete_tag(const char *sid, const tag_req_t *req) {
return web_post_async(url, buf, WebCtx.es_insecure_ssl); return web_post_async(url, buf, WebCtx.es_insecure_ssl);
} }
subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) { subreq_ctx_t *elastic_write_tag(const char* sid, const tag_req_t *req) {
char *buf = malloc(sizeof(char) * 8192); char *buf = malloc(sizeof(char) * 8192);
snprintf(buf, 8192, snprintf(buf, 8192,
"{" "{"
@ -528,14 +521,14 @@ subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) { void tag(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.buf + 5)) { if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.ptr + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
free(body); free(body);
@ -612,7 +605,7 @@ int check_auth0(struct mg_http_message *hm) {
} }
token_str = malloc(token.len + 1); token_str = malloc(token.len + 1);
strncpy(token_str, token.buf, token.len); strncpy(token_str, token.ptr, token.len);
*(token_str + token.len) = '\0'; *(token_str + token.len) = '\0';
int res = auth0_verify_jwt( int res = auth0_verify_jwt(
@ -630,7 +623,7 @@ int check_auth0(struct mg_http_message *hm) {
return TRUE; return TRUE;
} }
static void ev_router(struct mg_connection *nc, int ev, void *ev_data) { static void ev_router(struct mg_connection *nc, int ev, void *ev_data, UNUSED(void *fn_data)) {
if (ev == MG_EV_HTTP_MSG) { if (ev == MG_EV_HTTP_MSG) {
struct mg_http_message *hm = (struct mg_http_message *) ev_data; struct mg_http_message *hm = (struct mg_http_message *) ev_data;
@ -642,15 +635,13 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
} }
char uri[256]; char uri[256];
memcpy(uri, hm->uri.buf, hm->uri.len); memcpy(uri, hm->uri.ptr, hm->uri.len);
*(uri + hm->uri.len) = '\0'; *(uri + hm->uri.len) = '\0';
LOG_DEBUGF("serve.c", "<%s> GET %s", LOG_DEBUGF("serve.c", "<%s> GET %s",
web_address_to_string(&(nc->rem)), web_address_to_string(&(nc->rem)),
uri uri
); );
#define mg_http_match_uri(hm, pattern) mg_match((hm)->uri, mg_str(pattern), NULL)
if (mg_http_match_uri(hm, "/")) { if (mg_http_match_uri(hm, "/")) {
serve_index_html(nc, hm); serve_index_html(nc, hm);
return; return;
@ -743,7 +734,6 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
if (r->status_code == 200) { if (r->status_code == 200) {
web_send_headers(nc, 200, r->size, "Content-Type: application/json"); web_send_headers(nc, 200, r->size, "Content-Type: application/json");
mg_send(nc, r->body, r->size); mg_send(nc, r->body, r->size);
nc->is_resp = 0;
} else if (r->status_code == 0) { } else if (r->status_code == 0) {
sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!"); sist_log("serve.c", LOG_SIST_ERROR, "Could not connect to elasticsearch!");

View File

@ -179,8 +179,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size, json_value req_query, req_path, req_size_min, req_size_max, req_date_min, req_date_max, req_page_size,
req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after, req_index_ids, req_mime_types, req_tags, req_sort_asc, req_sort, req_seed, req_after,
req_fetch_aggregations, req_highlight, req_highlight_context_size, req_embedding, req_model, req_fetch_aggregations, req_highlight, req_highlight_context_size, req_embedding, req_model;
req_search_in_path;
if (!cJSON_IsObject(json) || if (!cJSON_IsObject(json) ||
(req_query = get_json_string(json, "query")).invalid || (req_query = get_json_string(json, "query")).invalid ||
@ -198,7 +197,6 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
(req_index_ids = get_json_number_array(json, "indexIds")).invalid || (req_index_ids = get_json_number_array(json, "indexIds")).invalid ||
(req_mime_types = get_json_array(json, "mimeTypes")).invalid || (req_mime_types = get_json_array(json, "mimeTypes")).invalid ||
(req_highlight = get_json_bool(json, "highlight")).invalid || (req_highlight = get_json_bool(json, "highlight")).invalid ||
(req_search_in_path = get_json_bool(json, "searchInPath")).invalid ||
(req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid || (req_highlight_context_size = get_json_number(json, "highlightContextSize")).invalid ||
(req_embedding = get_json_number_array(json, "embedding")).invalid || (req_embedding = get_json_number_array(json, "embedding")).invalid ||
(req_model = get_json_number(json, "model")).invalid || (req_model = get_json_number(json, "model")).invalid ||
@ -254,6 +252,7 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
fts_search_req_t *req = malloc(sizeof(fts_search_req_t)); fts_search_req_t *req = malloc(sizeof(fts_search_req_t));
req->sort = sort; req->sort = sort;
req->query = req_query.val ? strdup(req_query.val->valuestring) : NULL;
req->path = req_path.val ? strdup(req_path.val->valuestring) : NULL; req->path = req_path.val ? strdup(req_path.val->valuestring) : NULL;
req->size_min = req_size_min.val ? req_size_min.val->valuedouble : 0; req->size_min = req_size_min.val ? req_size_min.val->valuedouble : 0;
req->size_max = req_size_max.val ? req_size_max.val->valuedouble : 0; req->size_max = req_size_max.val ? req_size_max.val->valuedouble : 0;
@ -272,16 +271,6 @@ fts_search_req_t *get_search_req(struct mg_http_message *hm) {
? req_highlight_context_size.val->valueint ? req_highlight_context_size.val->valueint
: DEFAULT_HIGHLIGHT_CONTEXT_SIZE; : DEFAULT_HIGHLIGHT_CONTEXT_SIZE;
req->model = req_model.val ? req_model.val->valueint : 0; req->model = req_model.val ? req_model.val->valueint : 0;
if (req_search_in_path.val->valueint == FALSE && req_query.val) {
if (asprintf(&req->query, "- path : %s", req_query.val->valuestring) == -1) {
cJSON_Delete(json);
return NULL;
}
} else {
req->query = req_query.val ? strdup(req_query.val->valuestring) : NULL;
}
req->embedding = req_model.val req->embedding = req_model.val
? get_float_buffer(req_embedding.val, &req->embedding_size) ? get_float_buffer(req_embedding.val, &req->embedding_size)
: NULL; : NULL;
@ -420,8 +409,8 @@ void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.buf + 7)) { if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.ptr + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.buf); LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.ptr);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }

View File

@ -5,37 +5,31 @@
void web_serve_asset_index_html(struct mg_connection *nc) { void web_serve_asset_index_html(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_html), HTTP_CROSS_ORIGIN_HEADERS "Content-Type: text/html"); web_send_headers(nc, 200, sizeof(index_html), HTTP_CROSS_ORIGIN_HEADERS "Content-Type: text/html");
mg_send(nc, index_html, sizeof(index_html)); mg_send(nc, index_html, sizeof(index_html));
nc->is_resp = 0;
} }
void web_serve_asset_index_js(struct mg_connection *nc) { void web_serve_asset_index_js(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript"); web_send_headers(nc, 200, sizeof(index_js), "Content-Type: application/javascript");
mg_send(nc, index_js, sizeof(index_js)); mg_send(nc, index_js, sizeof(index_js));
nc->is_resp = 0;
} }
void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) { void web_serve_asset_chunk_vendors_js(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript"); web_send_headers(nc, 200, sizeof(chunk_vendors_js), "Content-Type: application/javascript");
mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js)); mg_send(nc, chunk_vendors_js, sizeof(chunk_vendors_js));
nc->is_resp = 0;
} }
void web_serve_asset_favicon_ico(struct mg_connection *nc) { void web_serve_asset_favicon_ico(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon"); web_send_headers(nc, 200, sizeof(favicon_ico), "Content-Type: image/x-icon");
mg_send(nc, favicon_ico, sizeof(favicon_ico)); mg_send(nc, favicon_ico, sizeof(favicon_ico));
nc->is_resp = 0;
} }
void web_serve_asset_style_css(struct mg_connection *nc) { void web_serve_asset_style_css(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css"); web_send_headers(nc, 200, sizeof(index_css), "Content-Type: text/css");
mg_send(nc, index_css, sizeof(index_css)); mg_send(nc, index_css, sizeof(index_css));
nc->is_resp = 0;
} }
void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) { void web_serve_asset_chunk_vendors_css(struct mg_connection *nc) {
web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css"); web_send_headers(nc, 200, sizeof(chunk_vendors_css), "Content-Type: text/css");
mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css)); mg_send(nc, chunk_vendors_css, sizeof(chunk_vendors_css));
nc->is_resp = 0;
} }
index_t *web_get_index_by_id(int index_id) { index_t *web_get_index_by_id(int index_id) {
@ -73,7 +67,7 @@ cJSON *web_get_json_body(struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
free(body); free(body);
@ -87,7 +81,7 @@ char *web_get_string_body(struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.buf, hm->body.len); memcpy(body, hm->body.ptr, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
return body; return body;
@ -98,7 +92,6 @@ void mg_send_json(struct mg_connection *nc, const cJSON *json) {
web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json"); web_send_headers(nc, 200, strlen(json_str), "Content-Type: application/json");
mg_send(nc, json_str, strlen(json_str)); mg_send(nc, json_str, strlen(json_str));
nc->is_resp = 0;
free(json_str); free(json_str);
} }

View File

@ -16,26 +16,9 @@ database_t *web_get_database(int index_id);
__always_inline __always_inline
static char *web_address_to_string(struct mg_addr *addr) { static char *web_address_to_string(struct mg_addr *addr) {
static char address_to_string_buf[64]; static char address_to_string_buf[INET6_ADDRSTRLEN];
if (addr->is_ip6) { return mg_ntoa(addr, address_to_string_buf, sizeof(address_to_string_buf));
snprintf(address_to_string_buf, sizeof(address_to_string_buf),
"%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x",
addr->ip[0], addr->ip[1],
addr->ip[2], addr->ip[3],
addr->ip[4], addr->ip[5],
addr->ip[6], addr->ip[7],
addr->ip[8], addr->ip[9],
addr->ip[10], addr->ip[11],
addr->ip[12], addr->ip[13],
addr->ip[14], addr->ip[15]);
} else {
snprintf(address_to_string_buf, sizeof(address_to_string_buf),
"%d.%d.%d.%d",
addr->ip[0], addr->ip[1], addr->ip[2], addr->ip[3]);
}
return address_to_string_buf;
} }
void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers); void web_send_headers(struct mg_connection *nc, int status_code, size_t length, char *extra_headers);

View File

@ -106,33 +106,12 @@ find_library(MUPDF_LIB NAMES liblibmupdf.a)
find_library(CMS_LIB NAMES lcms2) find_library(CMS_LIB NAMES lcms2)
find_library(JAS_LIB NAMES jasper) find_library(JAS_LIB NAMES jasper)
find_library(GUMBO_LIB NAMES gumbo) find_library(GUMBO_LIB NAMES gumbo)
find_library(GOMP_LIB NAMES libgomp.a gomp find_library(GOMP_LIB NAMES libgomp.a gomp PATHS /usr/lib/gcc/x86_64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/5/ /usr/lib/gcc/x86_64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/10/ /usr/lib/gcc/aarch64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/9/ /usr/lib/gcc/x86_64-linux-gnu/7/ /usr/lib/gcc/aarch64-linux-gnu/11/ /usr/lib/gcc/x86_64-linux-gnu/8/ /usr/lib/gcc/aarch64-linux-gnu/8/)
PATHS
/usr/lib/gcc/x86_64-linux-gnu/5/
/usr/lib/gcc/x86_64-linux-gnu/6/
/usr/lib/gcc/x86_64-linux-gnu/7/
/usr/lib/gcc/x86_64-linux-gnu/8/
/usr/lib/gcc/x86_64-linux-gnu/9/
/usr/lib/gcc/x86_64-linux-gnu/10/
/usr/lib/gcc/x86_64-linux-gnu/11/
/usr/lib/gcc/x86_64-linux-gnu/12/
/usr/lib/gcc/aarch64-linux-gnu/5/
/usr/lib/gcc/aarch64-linux-gnu/6/
/usr/lib/gcc/aarch64-linux-gnu/7/
/usr/lib/gcc/aarch64-linux-gnu/8/
/usr/lib/gcc/aarch64-linux-gnu/9/
/usr/lib/gcc/aarch64-linux-gnu/10/
/usr/lib/gcc/aarch64-linux-gnu/11/
/usr/lib/gcc/aarch64-linux-gnu/12/
)
find_package(Leptonica CONFIG REQUIRED) find_package(Leptonica CONFIG REQUIRED)
find_package(FFMPEG REQUIRED)
find_package(libraw CONFIG REQUIRED) find_package(libraw CONFIG REQUIRED)
find_package(Freetype REQUIRED) find_package(Freetype REQUIRED)
find_package(FFMPEG REQUIRED)
list(REMOVE_ITEM FFMPEG_LIBRARIES /usr/lib/x86_64-linux-gnu/libm.a)
list(REMOVE_ITEM FFMPEG_LIBRARIES /usr/lib/aarch64-linux-gnu/libm.a)
target_compile_options( target_compile_options(
scan scan
@ -187,6 +166,7 @@ target_link_libraries(
${WPD_LIB_DIR}/libwpd-0.9.a ${WPD_LIB_DIR}/libwpd-0.9.a
${WPD_LIB_DIR}/libwpd-stream-0.9.a ${WPD_LIB_DIR}/libwpd-stream-0.9.a
${FREETYPE_LIB}
${HARFBUZZ_LIB} ${HARFBUZZ_LIB}
${JBIG2DEC_LIB} ${JBIG2DEC_LIB}

View File

@ -175,19 +175,9 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE; return TRUE;
} }
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) { void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message); CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
} }
@ -195,10 +185,6 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) { void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message); CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
} }

View File

@ -223,10 +223,14 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) { void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
if (meta_contains_key(doc->meta_head, key)) { meta_line_t *meta = doc->meta_head;
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'", while (meta != NULL) {
key, tag->value); if (meta->key == key) {
return; CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
} }
text_buffer_t tex = text_buffer_create(-1); text_buffer_t tex = text_buffer_create(-1);
@ -268,7 +272,6 @@ static void append_audio_meta(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx
APPEND_TAG_META(MetaAlbum); APPEND_TAG_META(MetaAlbum);
} else if (strcmp(key, "comment") == 0) { } else if (strcmp(key, "comment") == 0) {
append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent); append_tag_meta_if_not_exists(ctx, doc, tag, MetaContent);
APPEND_TAG_META(MetaMediaComment);
} }
} }
} }
@ -441,7 +444,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED; return SAVE_THUMBNAIL_FAILED;
} }
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) { if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame); ocr_image(ctx, doc, decoder, frame_and_packet->frame);
} }
@ -565,9 +568,6 @@ void parse_media_format_ctx(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx,
AVStream *stream = pFormatCtx->streams[video_stream]; AVStream *stream = pFormatCtx->streams[video_stream];
if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) { if (stream->codecpar->width <= MIN_SIZE || stream->codecpar->height <= MIN_SIZE) {
CTX_LOG_DEBUGF(doc->filepath,
"Will not generate thumbnail because image is too small: %dx%d",
stream->codecpar->width, stream->codecpar->width);
avformat_close_input(&pFormatCtx); avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx); avformat_free_context(pFormatCtx);
return; return;

View File

@ -5,8 +5,8 @@
#include <tesseract/capi.h> #include <tesseract/capi.h>
#define MIN_OCR_WIDTH 350 #define MIN_OCR_WIDTH 350
#define MIN_OCR_HEIGHT 33 #define MIN_OCR_HEIGHT 100
#define MIN_OCR_LEN 3 #define MIN_OCR_LEN 10
#define OCR_IS_VALID_BPP(d) \ #define OCR_IS_VALID_BPP(d) \
((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 || \ ((d) == 1 || (d) == 2 || (d) == 4 || (d) == 8 || (d) == 16 || (d) == 24 || \
@ -28,13 +28,7 @@ ocr_extract_text(const char *tesseract_path, const char *tesseract_lang,
TessBaseAPI *api = TessBaseAPICreate(); TessBaseAPI *api = TessBaseAPICreate();
TessBaseAPIInit3(api, tesseract_path, tesseract_lang); TessBaseAPIInit3(api, tesseract_path, tesseract_lang);
// https://github.com/simon987/sist2/issues/443 TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
if (strstr(tesseract_lang, "chi") != NULL) {
TessBaseAPISetVariable(api, "preserve_interword_spaces", "1");
}
// TODO: add this as param?
// TessBaseAPISetPageSegMode(api, PSM_AUTO_OSD);
TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride); TessBaseAPISetImage(api, img_buf, img_w, img_h, img_bpp, img_stride);
TessBaseAPISetSourceResolution(api, img_xres); TessBaseAPISetSourceResolution(api, img_xres);

View File

@ -63,7 +63,6 @@ enum metakey {
MetaAlbumArtist, MetaAlbumArtist,
MetaGenre, MetaGenre,
MetaTitle, MetaTitle,
MetaMediaComment,
MetaFontName, MetaFontName,
MetaExifMake, MetaExifMake,
MetaExifDescription, MetaExifDescription,
@ -172,8 +171,6 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1]; char filepath[PATH_MAX * 2 + 1];
} parse_job_t; } parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h" #include "util.h"

View File

@ -392,18 +392,4 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job; return job;
} }
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif #endif