Compare commits

...

25 Commits

Author SHA1 Message Date
Shy
670dad185e Fix #521 2025-03-19 19:22:17 -04:00
Shy
bbbd727e6a Update sist2-python version 2025-03-19 18:38:21 -04:00
Shy
d800effad9 Merge pull request #511 from dpieski/patch-5
Update README.md
2025-02-06 17:58:36 -05:00
Shy
371e9c408e Merge pull request #512 from dpieski/patch-6
Update README.md
2025-02-06 17:58:07 -05:00
Andrew
ee1b1d8bb4 Update README.md
Moved README references from simon987 to sist2app
2025-02-03 15:09:11 -06:00
Andrew
63a097a463 Update README.md
Update to the docker-compose.yml example.
2025-02-03 15:00:03 -06:00
Shy
7a03a2202e Fix #481 2025-01-24 19:40:08 -05:00
Shy
050fc500ce Fix #462 2025-01-24 19:22:01 -05:00
Shy
d44679131b Update compose file to avoid confusion. Fixes #490 2025-01-23 21:45:01 -05:00
Shy
4dd5e70406 Fix #492 2025-01-23 21:40:37 -05:00
Shy
5a82581992 Fix magic database problem 2025-01-23 21:40:27 -05:00
Shy
0dc18a56c0 Fix #509 2025-01-23 19:10:17 -05:00
Shy
258b2e31e6 Version bump 2025-01-23 19:10:02 -05:00
Shy
c726074029 Update tessdata paths 2025-01-23 19:09:54 -05:00
Shy
7873ef003d Fix CI build attempt 6 2025-01-22 22:16:42 -05:00
Shy
d41266e136 Fix CI build attempt 5 2025-01-22 22:15:37 -05:00
Shy
0e946092eb Fix CI build attempt 4 2025-01-22 21:58:55 -05:00
Shy
95b19e2e67 Fix CI build attempt 3 2025-01-22 21:55:09 -05:00
Shy
bd98eb2522 Fix CI build attempt 2 2025-01-22 21:51:59 -05:00
Shy
3d99add79e Fix CI build 2025-01-22 21:43:23 -05:00
Shy
2d6553d5d2 Update magic gen script 2025-01-22 21:39:23 -05:00
Shy
7d67354b96 Update CI build config 2025-01-22 21:32:54 -05:00
Shy
1b77daef16 Update repository URLs 2025-01-22 21:27:27 -05:00
Shy
d7038be35b Fix #506 2025-01-16 18:32:33 -05:00
Shy
c1573a803e Update third-party dependencies 2025-01-12 11:55:14 -05:00
29 changed files with 182 additions and 124 deletions

View File

@@ -7,11 +7,36 @@ platform:
arch: amd64 arch: amd64
steps: steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: build - name: build
image: simon987/sist2-build image: sist2app/sist2-build
depends_on:
- submodules
commands: commands:
- ./scripts/build.sh - ./scripts/build.sh
- name: scp files - name: scp files
depends_on:
- build
image: appleboy/drone-scp image: appleboy/drone-scp
settings: settings:
host: host:
@@ -22,26 +47,11 @@ steps:
from_secret: SSH_USER from_secret: SSH_USER
key: key:
from_secret: SSH_KEY from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./VERSION - ./VERSION
- ./sist2-x64-linux - ./sist2-x64-linux
- ./sist2-x64-linux-debug - ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
--- ---
kind: pipeline kind: pipeline
@@ -52,11 +62,36 @@ platform:
arch: arm64 arch: arm64
steps: steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag
- name: build - name: build
image: simon987/sist2-build-arm64 image: sist2app/sist2-build-arm64
depends_on:
- submodules
commands: commands:
- ./scripts/build_arm64.sh - ./scripts/build_arm64.sh
- name: scp files - name: scp files
depends_on:
- build
image: appleboy/drone-scp image: appleboy/drone-scp
settings: settings:
host: host:
@@ -67,22 +102,7 @@ steps:
from_secret: SSH_USER from_secret: SSH_USER
key: key:
from_secret: SSH_KEY from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/ target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source: source:
- ./sist2-arm64-linux - ./sist2-arm64-linux
- ./sist2-arm64-linux-debug - ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

View File

@@ -147,6 +147,7 @@ add_dependencies(
target_link_libraries( target_link_libraries(
sist2 sist2
m
z z
argparse argparse
unofficial::mongoose::mongoose unofficial::mongoose::mongoose

View File

@@ -1,5 +1,4 @@
FROM simon987/sist2-build as build FROM sist2app/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/ WORKDIR /build/

View File

@@ -1,5 +1,4 @@
FROM simon987/sist2-build-arm64 as build FROM sist2app/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
WORKDIR /build/ WORKDIR /build/

View File

@@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg) ![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2) [![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/) [![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/) **Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)* ### Using Docker Compose *(Windows/Linux/Mac)*
```yaml ```yaml
version: "3"
services: services:
elasticsearch: elasticsearch:
image: elasticsearch:7.17.9 image: elasticsearch:7.17.9
@@ -53,11 +51,11 @@ services:
- "PUID=1000" - "PUID=1000"
- "PGID=1000" - "PGID=1000"
sist2-admin: sist2-admin:
image: simon987/sist2:3.4.2-x64-linux image: sist2app/sist2:x64-linux
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - /data/sist2-admin-data/:/sist2-admin/
- /:/host - /<path to index>/:/host
ports: ports:
- 4090:4090 - 4090:4090
# NOTE: Don't expose this port publicly! # NOTE: Don't expose this port publicly!
@@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
``` ```
* **SQLite**: No installation required * **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases). 2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`. Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage. 3. See [usage guide](docs/USAGE.md) for command line usage.
@@ -100,20 +98,20 @@ Example usage:
| File type | Library | Content | Thumbnail | Metadata | | File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------| |:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title | | pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - | | cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags | | `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist | | `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags | | `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags | | raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style | | ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - | | `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - | | html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no | | tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title | | docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title | | doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title | | mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* | | wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - | | json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)* \* *See [Archive files](#archive-files)*
@@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files). directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages The `sist2app/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed. (hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language You can use the `+` separator to specify multiple languages. The language
@@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it. to enable it.
The text processing is done in your browser, no data is sent to any third-party services. The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details. See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
#### List of available repositories: #### List of available repositories:
| URL | Maintainer | Purpose | | URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------| |---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General | | [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
<details> <details>
<summary>Screenshot</summary> <summary>Screenshot</summary>
@@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker ### Using docker
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/sist2app/sist2/
cd sist2 cd sist2
docker build . -t my-sist2-image docker build . -t my-sist2-image
# Copy sist2 executable from docker image # Copy sist2 executable from docker image
@@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
``` ```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg 2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
3. Install vcpkg dependencies 3. Install vcpkg dependencies
```bash ```bash
@@ -223,7 +221,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
4. Build 4. Build
```bash ```bash
git clone --recursive https://github.com/simon987/sist2/ git clone --recursive https://github.com/sist2app/sist2/
(cd sist2-vue; npm install; npm run build) (cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build) (cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake . cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@@ -18,7 +18,7 @@ services:
container_name: sist2-admin container_name: sist2-admin
volumes: volumes:
- /data/sist2-admin-data/:/sist2-admin/ - /data/sist2-admin-data/:/sist2-admin/
- /:/host - /<path to index>/:/host
ports: ports:
- 4090:4090 - 4090:4090
# NOTE: Don't export this port publicly! # NOTE: Don't export this port publicly!

View File

@@ -189,7 +189,7 @@ Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float
In theory, embeddings can be created for any type of documents (image, text, audio etc.). In theory, embeddings can be created for any type of documents (image, text, audio etc.).
For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images For example, the [clip](https://github.com/sist2app/sist2-script-clip) User Script, generates 512-d embeddings of images
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search" (videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime. textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.

View File

@@ -2,8 +2,6 @@
VCPKG_ROOT="/vcpkg" VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
( (
cd sist2-vue/ cd sist2-vue/
npm install npm install

View File

@@ -1,8 +1,16 @@
try: MAGIC_PATHS = [
with open("/usr/lib/file/magic.mgc", "rb") as f: "/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
data = f.read() "/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
except: "/usr/lib/file/magic.mgc"
data = bytes([]) ]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data))) print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -4,7 +4,7 @@
<b-container class="pt-4"> <b-container class="pt-4">
<b-alert show dismissible variant="info"> <b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on to the <a href="https://github.com/sist2app/sist2/issues/new/choose" target="_blank">issue tracker on
Github</a>. Thank you! Github</a>. Thank you!
</b-alert> </b-alert>
<router-view v-if="$store.state.sist2AdminInfo"/> <router-view v-if="$store.state.sist2AdminInfo"/>

View File

@@ -4,4 +4,4 @@ uvicorn
websockets websockets
pycron pycron
GitPython GitPython
git+https://github.com/simon987/sist2-python.git git+https://github.com/sist2app/sist2-python.git@2.1

View File

@@ -96,7 +96,7 @@ SCRIPT_TEMPLATES = {
"CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript( "CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript(
name=name, name=name,
type=ScriptType.GIT, type=ScriptType.GIT,
git_repository="https://github.com/simon987/sist2-script-clip", git_repository="https://github.com/sist2app/sist2-script-clip",
extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff" extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff"
), ),
"Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript( "Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript(

View File

@@ -309,7 +309,7 @@ class Sist2Api {
} }
getTagsSqlite() { getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`) return axios.get(`${this.baseUrl}fts/tags`)
.then(resp => { .then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count)) return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
}); });
@@ -566,7 +566,7 @@ class Sist2Api {
} }
getDocumentSqlite(sid) { getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}/fts/d/${sid}`) return axios.get(`${this.baseUrl}fts/d/${sid}`)
.then(resp => ({ .then(resp => ({
_source: resp.data _source: resp.data
})); }));
@@ -589,7 +589,7 @@ class Sist2Api {
} }
getTagSuggestionsSqlite(prefix) { getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix) return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
@@ -620,7 +620,7 @@ class Sist2Api {
} }
getEmbeddings(sid, modelId) { getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`) return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data)); .then(resp => (resp.data));
} }
} }

View File

@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
} }
if (dateMin && dateMax) { if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}}) filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
} else if (dateMin) { } else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}}) filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
} else if (dateMax) { } else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}}) filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
} }
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@@ -9,7 +9,7 @@
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info"> <span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch" href="https://github.com/sist2app/sist2/blob/master/docs/USAGE.md#elasticsearch"
target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span> target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span>
</span> </span>

View File

@@ -138,7 +138,7 @@ export default {
}, },
debug: "Debug information", debug: "Debug information",
debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" + debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" +
" new features, please submit a new issue <a href='https://github.com/simon987/sist2/issues/new/choose'>here</a>.", " new features, please submit a new issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>here</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Elasticsearch connection error", esConnErrTitle: "Elasticsearch connection error",
@@ -318,7 +318,7 @@ export default {
}, },
debug: "Debug Informationen", debug: "Debug Informationen",
debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " + debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
"neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.", "neue Features hast, poste sie bitte <a href='https://github.com/sist2app/sist2/issues/new/choose'>hier</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Elasticsearch Verbindungsfehler", esConnErrTitle: "Elasticsearch Verbindungsfehler",
@@ -494,7 +494,7 @@ export default {
debug: "Information de débogage", debug: "Information de débogage",
debugDescription: "Informations utiles pour le débogage\n" + debugDescription: "Informations utiles pour le débogage\n" +
"Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," + "Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," +
" veuillez soumettre un nouvel Issue <a href='https://github.com/simon987/sist2/issues/new/choose'>ici</a>.", " veuillez soumettre un nouvel Issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>ici</a>.",
tagline: "Tagline", tagline: "Tagline",
toast: { toast: {
esConnErrTitle: "Erreur de connexion Elasticsearch", esConnErrTitle: "Erreur de connexion Elasticsearch",
@@ -668,7 +668,7 @@ export default {
}, },
debug: "调试信息", debug: "调试信息",
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" + debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" +
"<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.", "<a href='https://github.com/sist2app/sist2/issues/new/choose'>这里</a>.",
tagline: "标签栏", tagline: "标签栏",
toast: { toast: {
esConnErrTitle: "Elasticsearch连接错误", esConnErrTitle: "Elasticsearch连接错误",
@@ -846,7 +846,7 @@ export default {
}, },
debug: "Informacje dla programistów", debug: "Informacje dla programistów",
debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" + debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" +
" propozycje zmian, zgłoś to proszę <a href='https://github.com/simon987/sist2/issues/new/choose'>tutaj</a>.", " propozycje zmian, zgłoś to proszę <a href='https://github.com/sist2app/sist2/issues/new/choose'>tutaj</a>.",
tagline: "Slogan", tagline: "Slogan",
toast: { toast: {
esConnErrTitle: "Problem z połączeniem z Elasticsearch", esConnErrTitle: "Problem z połączeniem z Elasticsearch",

View File

@@ -58,7 +58,7 @@ export default new Vuex.Store({
optVidPreviewInterval: 700, optVidPreviewInterval: 700,
optSimpleLightbox: true, optSimpleLightbox: true,
optShowTagPickerFilter: true, optShowTagPickerFilter: true,
optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json", optMlRepositories: "https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json",
optAutoAnalyze: false, optAutoAnalyze: false,
optMlDefaultModel: null, optMlDefaultModel: null,

View File

@@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/", "/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/", "/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/", "/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./", "./",
NULL NULL
}; };

View File

@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX]; char buf[PATH_MAX];
strcpy(buf, current_job); strcpy(buf, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job); SET_CURRENT_JOB(ipc_ctx, current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC); sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
} }
@@ -478,8 +478,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
database_iterator_t *database_create_delete_list_iterator(database_t *db) { database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt; sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list " sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list", -1, &stmt, NULL);
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t)); database_iterator_t *iter = malloc(sizeof(database_iterator_t));

View File

@@ -64,6 +64,8 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2]; char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t; } database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct { typedef struct {
double date_min; double date_min;
double date_max; double date_max;

View File

@@ -142,6 +142,10 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums; job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
} }
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t)); document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath); strcpy(doc->filepath, job->filepath);
@@ -161,7 +165,8 @@ void parse(parse_job_t *job) {
return; return;
} }
if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) { int document_exists = database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime);
if (document_exists) {
CLOSE_FILE(job->vfile) CLOSE_FILE(job->vfile)
free(doc); free(doc);
return; return;

View File

@@ -55,13 +55,13 @@
static const char *const Version = VERSION; static const char *const Version = VERSION;
static const int VersionMajor = 3; static const int VersionMajor = 3;
static const int VersionMinor = 4; static const int VersionMinor = 4;
static const int VersionPatch = 2; static const int VersionPatch = 3;
#ifndef SIST_PLATFORM #ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown #define SIST_PLATFORM unknown
#endif #endif
#define EXPECTED_MONGOOSE_VERSION "7.13" #define EXPECTED_MONGOOSE_VERSION "7.16"
#define Q(x) #x #define Q(x) #x
#define QUOTE(x) Q(x) #define QUOTE(x) Q(x)

View File

@@ -50,13 +50,13 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) { if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
int model_id = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10); int model_id = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
database_t *db = web_get_database(sid.index_id); database_t *db = web_get_database(sid.index_id);
if (db == NULL) { if (db == NULL) {
@@ -86,11 +86,11 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
char index_id_str[9]; char index_id_str[9];
char arg_stat_type[5]; char arg_stat_type[5];
memcpy(index_id_str, hm->uri.ptr + 3, 8); memcpy(index_id_str, hm->uri.buf + 3, 8);
*(index_id_str + 8) = '\0'; *(index_id_str + 8) = '\0';
int index_id = (int) strtol(index_id_str, NULL, 16); int index_id = (int) strtol(index_id_str, NULL, 16);
memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4); memcpy(arg_stat_type, hm->uri.buf + 3 + 9, 4);
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0'; *(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type); database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
@@ -135,19 +135,19 @@ void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm
} }
} }
void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) { void serve_favicon_ico(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_favicon_ico(nc); web_serve_asset_favicon_ico(nc);
} }
void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) { void serve_style_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_style_css(nc); web_serve_asset_style_css(nc);
} }
void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) { void serve_chunk_vendors_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_chunk_vendors_css(nc); web_serve_asset_chunk_vendors_css(nc);
} }
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int index_id, void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm), int index_id,
int doc_id, int arg_num) { int doc_id, int arg_num) {
database_t *db = web_get_database(index_id); database_t *db = web_get_database(index_id);
@@ -179,13 +179,13 @@ void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int i
void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) { void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) { if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
int num = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10); int num = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num); serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num);
} }
@@ -193,8 +193,8 @@ void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) { void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) { if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
@@ -210,7 +210,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len); memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
char url[4096]; char url[4096];
@@ -416,8 +416,8 @@ cJSON *get_root_document_by_id(int index_id, int doc_id) {
void file(struct mg_connection *nc, struct mg_http_message *hm) { void file(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) { if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
@@ -528,14 +528,14 @@ subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) { void tag(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.ptr + 5)) { if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.buf + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len); memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
free(body); free(body);
@@ -612,7 +612,7 @@ int check_auth0(struct mg_http_message *hm) {
} }
token_str = malloc(token.len + 1); token_str = malloc(token.len + 1);
strncpy(token_str, token.ptr, token.len); strncpy(token_str, token.buf, token.len);
*(token_str + token.len) = '\0'; *(token_str + token.len) = '\0';
int res = auth0_verify_jwt( int res = auth0_verify_jwt(
@@ -642,13 +642,15 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
} }
char uri[256]; char uri[256];
memcpy(uri, hm->uri.ptr, hm->uri.len); memcpy(uri, hm->uri.buf, hm->uri.len);
*(uri + hm->uri.len) = '\0'; *(uri + hm->uri.len) = '\0';
LOG_DEBUGF("serve.c", "<%s> GET %s", LOG_DEBUGF("serve.c", "<%s> GET %s",
web_address_to_string(&(nc->rem)), web_address_to_string(&(nc->rem)),
uri uri
); );
#define mg_http_match_uri(hm, pattern) mg_match((hm)->uri, mg_str(pattern), NULL)
if (mg_http_match_uri(hm, "/")) { if (mg_http_match_uri(hm, "/")) {
serve_index_html(nc, hm); serve_index_html(nc, hm);
return; return;

View File

@@ -420,8 +420,8 @@ void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid; sist_id_t sid;
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.ptr + 7)) { if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.buf + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.ptr); LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND HTTP_REPLY_NOT_FOUND
return; return;
} }

View File

@@ -73,7 +73,7 @@ cJSON *web_get_json_body(struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len); memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body); cJSON *json = cJSON_Parse(body);
free(body); free(body);
@@ -87,7 +87,7 @@ char *web_get_string_body(struct mg_http_message *hm) {
} }
char *body = malloc(hm->body.len + 1); char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len); memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0'; *(body + hm->body.len) = '\0';
return body; return body;

View File

@@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE; return TRUE;
} }
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) { void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message); CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
} }
@@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) { void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user; document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx; const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message); CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
} }

View File

@@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) { void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
meta_line_t *meta = doc->meta_head; if (meta_contains_key(doc->meta_head, key)) {
while (meta != NULL) { CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
if (meta->key == key) { key, tag->value);
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'", return;
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
} }
text_buffer_t tex = text_buffer_create(-1); text_buffer_t tex = text_buffer_create(-1);
@@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED; return SAVE_THUMBNAIL_FAILED;
} }
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) { if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame); ocr_image(ctx, doc, decoder, frame_and_packet->frame);
} }

View File

@@ -172,6 +172,8 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1]; char filepath[PATH_MAX * 2 + 1];
} parse_job_t; } parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h" #include "util.h"

View File

@@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job; return job;
} }
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif #endif