Compare commits

...

27 Commits

Author SHA1 Message Date
Shy
e57fdc1fe8 Version bump 2025-06-12 20:57:04 -04:00
Rick Console
8bb12f8ae2 fixed occasional hanging when indexing 2025-04-18 16:16:38 -04:00
Shy
670dad185e Fix #521 2025-03-19 19:22:17 -04:00
Shy
bbbd727e6a Update sist2-python version 2025-03-19 18:38:21 -04:00
Shy
d800effad9 Merge pull request #511 from dpieski/patch-5
Update README.md
2025-02-06 17:58:36 -05:00
Shy
371e9c408e Merge pull request #512 from dpieski/patch-6
Update README.md
2025-02-06 17:58:07 -05:00
Andrew
ee1b1d8bb4 Update README.md
Moved README references from simon987 to sist2app
2025-02-03 15:09:11 -06:00
Andrew
63a097a463 Update README.md
Update to the docker-compose.yml example.
2025-02-03 15:00:03 -06:00
Shy
7a03a2202e Fix #481 2025-01-24 19:40:08 -05:00
Shy
050fc500ce Fix #462 2025-01-24 19:22:01 -05:00
Shy
d44679131b Update compose file to avoid confusion. Fixes #490 2025-01-23 21:45:01 -05:00
Shy
4dd5e70406 Fix #492 2025-01-23 21:40:37 -05:00
Shy
5a82581992 Fix magic database problem 2025-01-23 21:40:27 -05:00
Shy
0dc18a56c0 Fix #509 2025-01-23 19:10:17 -05:00
Shy
258b2e31e6 Version bump 2025-01-23 19:10:02 -05:00
Shy
c726074029 Update tessdata paths 2025-01-23 19:09:54 -05:00
Shy
7873ef003d Fix CI build attempt 6 2025-01-22 22:16:42 -05:00
Shy
d41266e136 Fix CI build attempt 5 2025-01-22 22:15:37 -05:00
Shy
0e946092eb Fix CI build attempt 4 2025-01-22 21:58:55 -05:00
Shy
95b19e2e67 Fix CI build attempt 3 2025-01-22 21:55:09 -05:00
Shy
bd98eb2522 Fix CI build attempt 2 2025-01-22 21:51:59 -05:00
Shy
3d99add79e Fix CI build 2025-01-22 21:43:23 -05:00
Shy
2d6553d5d2 Update magic gen script 2025-01-22 21:39:23 -05:00
Shy
7d67354b96 Update CI build config 2025-01-22 21:32:54 -05:00
Shy
1b77daef16 Update repository URLs 2025-01-22 21:27:27 -05:00
Shy
d7038be35b Fix #506 2025-01-16 18:32:33 -05:00
Shy
c1573a803e Update third-party dependencies 2025-01-12 11:55:14 -05:00
30 changed files with 184 additions and 126 deletions

View File

@@ -7,11 +7,36 @@ platform:
arch: amd64
steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
- name: build
image: simon987/sist2-build
image: sist2app/sist2-build
depends_on:
- submodules
commands:
- ./scripts/build.sh
- name: scp files
depends_on:
- build
image: appleboy/drone-scp
settings:
host:
@@ -22,26 +47,11 @@ steps:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./VERSION
- ./sist2-x64-linux
- ./sist2-x64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile
auto_tag: true
auto_tag_suffix: x64-linux
when:
event:
- tag
---
kind: pipeline
@@ -52,11 +62,36 @@ platform:
arch: arm64
steps:
- name: submodules
image: alpine/git
commands:
- git submodule update --init --recursive
- name: docker
image: plugins/docker
depends_on:
- submodules
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: sist2app/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag
- name: build
image: simon987/sist2-build-arm64
image: sist2app/sist2-build-arm64
depends_on:
- submodules
commands:
- ./scripts/build_arm64.sh
- name: scp files
depends_on:
- build
image: appleboy/drone-scp
settings:
host:
@@ -67,22 +102,7 @@ steps:
from_secret: SSH_USER
key:
from_secret: SSH_KEY
target: /files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
target: ~/files/sist2/${DRONE_REPO_OWNER}_${DRONE_REPO_NAME}/arm_${DRONE_BRANCH}_${DRONE_BUILD_NUMBER}_${DRONE_COMMIT}/
source:
- ./sist2-arm64-linux
- ./sist2-arm64-linux-debug
- name: docker
image: plugins/docker
settings:
username:
from_secret: DOCKER_USER
password:
from_secret: DOCKER_PASSWORD
repo: simon987/sist2
context: ./
dockerfile: ./Dockerfile.arm64
auto_tag: true
auto_tag_suffix: arm64-linux
when:
event:
- tag

View File

@@ -89,7 +89,7 @@ target_include_directories(
target_compile_options(
sist2
PRIVATE
-fPIC
# -fPIC
)
if (SIST_DEBUG)
@@ -147,6 +147,7 @@ add_dependencies(
target_link_libraries(
sist2
# m
z
argparse
unofficial::mongoose::mongoose

View File

@@ -1,5 +1,4 @@
FROM simon987/sist2-build as build
MAINTAINER simon987 <me@simon987.net>
FROM sist2app/sist2-build as build
WORKDIR /build/

View File

@@ -1,5 +1,4 @@
FROM simon987/sist2-build-arm64 as build
MAINTAINER simon987 <me@simon987.net>
FROM sist2app/sist2-build-arm64 as build
WORKDIR /build/

View File

@@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)*
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
@@ -53,11 +51,11 @@ services:
- "PUID=1000"
- "PGID=1000"
sist2-admin:
image: simon987/sist2:3.4.2-x64-linux
image: sist2app/sist2:x64-linux
restart: unless-stopped
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't expose this port publicly!
@@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
@@ -100,20 +98,20 @@ Example usage:
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
The `sist2app/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language
@@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
<details>
<summary>Screenshot</summary>
@@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
@@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
3. Install vcpkg dependencies
```bash
@@ -223,7 +221,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
4. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@@ -18,7 +18,7 @@ services:
container_name: sist2-admin
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!

View File

@@ -189,7 +189,7 @@ Since v3.2.0, User scripts can be used to generate _embeddings_ (vector of float
In theory, embeddings can be created for any type of documents (image, text, audio etc.).
For example, the [clip](https://github.com/simon987/sist2-script-clip) User Script, generates 512-d embeddings of images
For example, the [clip](https://github.com/sist2app/sist2-script-clip) User Script, generates 512-d embeddings of images
(videos are also supported using the thumbnails generated by sist2). When the user enters a query in the "Embeddings Search"
textbox, the query's embedding is generated in their browser, leveraging the ONNX web runtime.

View File

@@ -2,8 +2,6 @@
VCPKG_ROOT="/vcpkg"
git submodule update --init --recursive
(
cd sist2-vue/
npm install

View File

@@ -1,8 +1,16 @@
try:
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
except:
data = bytes([])
MAGIC_PATHS = [
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -4,7 +4,7 @@
<b-container class="pt-4">
<b-alert show dismissible variant="info">
This is a beta version of sist2-admin. Please submit bug reports, usability issues and feature requests
to the <a href="https://github.com/simon987/sist2/issues/new/choose" target="_blank">issue tracker on
to the <a href="https://github.com/sist2app/sist2/issues/new/choose" target="_blank">issue tracker on
Github</a>. Thank you!
</b-alert>
<router-view v-if="$store.state.sist2AdminInfo"/>

View File

@@ -4,4 +4,4 @@ uvicorn
websockets
pycron
GitPython
git+https://github.com/simon987/sist2-python.git
git+https://github.com/sist2app/sist2-python.git@2.1

View File

@@ -220,7 +220,7 @@ class Sist2IndexTask(Sist2Task):
except ProcessLookupError:
pass
try:
os.wait()
os.waitpid(pid, 0)
except ChildProcessError:
pass

View File

@@ -96,7 +96,7 @@ SCRIPT_TEMPLATES = {
"CLIP - Generate embeddings to predict the most relevant image based on the text prompt": lambda name: UserScript(
name=name,
type=ScriptType.GIT,
git_repository="https://github.com/simon987/sist2-script-clip",
git_repository="https://github.com/sist2app/sist2-script-clip",
extra_args="--num-tags=1 --tags-file=general.txt --color=#dcd7ff"
),
"Whisper - Speech to text with OpenAI Whisper": lambda name: UserScript(

View File

@@ -309,7 +309,7 @@ class Sist2Api {
}
getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`)
return axios.get(`${this.baseUrl}fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
@@ -566,7 +566,7 @@ class Sist2Api {
}
getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
return axios.get(`${this.baseUrl}fts/d/${sid}`)
.then(resp => ({
_source: resp.data
}));
@@ -589,7 +589,7 @@ class Sist2Api {
}
getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
@@ -620,7 +620,7 @@ class Sist2Api {
}
getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data));
}
}

View File

@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@@ -9,7 +9,7 @@
<span class="badge badge-pill version" v-if="$store && $store.state.sist2Info">
v{{ sist2Version() }}<span v-if="isDebug()">-dbg</span><span v-if="isLegacy() && !hideLegacy()">-<a
href="https://github.com/simon987/sist2/blob/master/docs/USAGE.md#elasticsearch"
href="https://github.com/sist2app/sist2/blob/master/docs/USAGE.md#elasticsearch"
target="_blank">legacyES</a></span><span v-if="$store.state.uiSqliteMode">-SQLite</span>
</span>

View File

@@ -138,7 +138,7 @@ export default {
},
debug: "Debug information",
debugDescription: "Information useful for debugging. If you encounter bugs or have suggestions for" +
" new features, please submit a new issue <a href='https://github.com/simon987/sist2/issues/new/choose'>here</a>.",
" new features, please submit a new issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>here</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Elasticsearch connection error",
@@ -318,7 +318,7 @@ export default {
},
debug: "Debug Informationen",
debugDescription: "Informationen für das Debugging. Wenn du Bugs gefunden oder Anregungen für " +
"neue Features hast, poste sie bitte <a href='https://github.com/simon987/sist2/issues/new/choose'>hier</a>.",
"neue Features hast, poste sie bitte <a href='https://github.com/sist2app/sist2/issues/new/choose'>hier</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Elasticsearch Verbindungsfehler",
@@ -494,7 +494,7 @@ export default {
debug: "Information de débogage",
debugDescription: "Informations utiles pour le débogage\n" +
"Si vous rencontrez des bogues ou si vous avez des suggestions pour de nouvelles fonctionnalités," +
" veuillez soumettre un nouvel Issue <a href='https://github.com/simon987/sist2/issues/new/choose'>ici</a>.",
" veuillez soumettre un nouvel Issue <a href='https://github.com/sist2app/sist2/issues/new/choose'>ici</a>.",
tagline: "Tagline",
toast: {
esConnErrTitle: "Erreur de connexion Elasticsearch",
@@ -668,7 +668,7 @@ export default {
},
debug: "调试信息",
debugDescription: "对调试除错有用的信息。 若您遇到bug或者想建议新功能请提交新Issue到" +
"<a href='https://github.com/simon987/sist2/issues/new/choose'>这里</a>.",
"<a href='https://github.com/sist2app/sist2/issues/new/choose'>这里</a>.",
tagline: "标签栏",
toast: {
esConnErrTitle: "Elasticsearch连接错误",
@@ -846,7 +846,7 @@ export default {
},
debug: "Informacje dla programistów",
debugDescription: "Informacje przydatne do znajdowania błędów w oprogramowaniu. Jeśli napotkasz błąd lub masz" +
" propozycje zmian, zgłoś to proszę <a href='https://github.com/simon987/sist2/issues/new/choose'>tutaj</a>.",
" propozycje zmian, zgłoś to proszę <a href='https://github.com/sist2app/sist2/issues/new/choose'>tutaj</a>.",
tagline: "Slogan",
toast: {
esConnErrTitle: "Problem z połączeniem z Elasticsearch",

View File

@@ -58,7 +58,7 @@ export default new Vuex.Store({
optVidPreviewInterval: 700,
optSimpleLightbox: true,
optShowTagPickerFilter: true,
optMlRepositories: "https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json",
optMlRepositories: "https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json",
optAutoAnalyze: false,
optMlDefaultModel: null,

View File

@@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./",
NULL
};

View File

@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX];
strcpy(buf, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
SET_CURRENT_JOB(ipc_ctx, current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
}
@@ -478,8 +478,7 @@ index_descriptor_t *database_read_index_descriptor(database_t *db) {
database_iterator_t *database_create_delete_list_iterator(database_t *db) {
sqlite3_stmt *stmt;
sqlite3_prepare_v2(db->db, "SELECT doc.id FROM delete_list "
"INNER JOIN document doc ON doc.ROWID = delete_list.id;", -1, &stmt, NULL);
sqlite3_prepare_v2(db->db, "SELECT id FROM delete_list", -1, &stmt, NULL);
database_iterator_t *iter = malloc(sizeof(database_iterator_t));

View File

@@ -64,6 +64,8 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct {
double date_min;
double date_max;

View File

@@ -142,6 +142,10 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
}
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath);
@@ -161,7 +165,8 @@ void parse(parse_job_t *job) {
return;
}
if (database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime)) {
int document_exists = database_mark_document(ProcData.index_db, doc->filepath + ScanCtx.index.desc.root_len, doc->mtime);
if (document_exists) {
CLOSE_FILE(job->vfile)
free(doc);
return;

View File

@@ -55,13 +55,13 @@
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 2;
static const int VersionPatch = 4;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown
#endif
#define EXPECTED_MONGOOSE_VERSION "7.13"
#define EXPECTED_MONGOOSE_VERSION "7.16"
#define Q(x) #x
#define QUOTE(x) Q(x)

View File

@@ -50,13 +50,13 @@ void get_embedding(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid embedding path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}
int model_id = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
int model_id = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
database_t *db = web_get_database(sid.index_id);
if (db == NULL) {
@@ -86,11 +86,11 @@ void stats_files(struct mg_connection *nc, struct mg_http_message *hm) {
char index_id_str[9];
char arg_stat_type[5];
memcpy(index_id_str, hm->uri.ptr + 3, 8);
memcpy(index_id_str, hm->uri.buf + 3, 8);
*(index_id_str + 8) = '\0';
int index_id = (int) strtol(index_id_str, NULL, 16);
memcpy(arg_stat_type, hm->uri.ptr + 3 + 9, 4);
memcpy(arg_stat_type, hm->uri.buf + 3 + 9, 4);
*(arg_stat_type + sizeof(arg_stat_type) - 1) = '\0';
database_stat_type_d stat_type = database_get_stat_type_by_mnemonic(arg_stat_type);
@@ -135,19 +135,19 @@ void serve_chunk_vendors_js(struct mg_connection *nc, struct mg_http_message *hm
}
}
void serve_favicon_ico(struct mg_connection *nc, struct mg_http_message *hm) {
void serve_favicon_ico(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_favicon_ico(nc);
}
void serve_style_css(struct mg_connection *nc, struct mg_http_message *hm) {
void serve_style_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_style_css(nc);
}
void serve_chunk_vendors_css(struct mg_connection *nc, struct mg_http_message *hm) {
void serve_chunk_vendors_css(struct mg_connection *nc, UNUSED(struct mg_http_message *hm)) {
web_serve_asset_chunk_vendors_css(nc);
}
void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int index_id,
void serve_thumbnail(struct mg_connection *nc, UNUSED(struct mg_http_message *hm), int index_id,
int doc_id, int arg_num) {
database_t *db = web_get_database(index_id);
@@ -179,13 +179,13 @@ void serve_thumbnail(struct mg_connection *nc, struct mg_http_message *hm, int i
void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != SIST_SID_LEN + 2 + 4 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}
int num = (int) strtol(hm->uri.ptr + SIST_SID_LEN + 3, NULL, 10);
int num = (int) strtol(hm->uri.buf + SIST_SID_LEN + 3, NULL, 10);
serve_thumbnail(nc, hm, sid.index_id, sid.doc_id, num);
}
@@ -193,8 +193,8 @@ void thumbnail_with_num(struct mg_connection *nc, struct mg_http_message *hm) {
void thumbnail(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid thumbnail path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}
@@ -210,7 +210,7 @@ void search(struct mg_connection *nc, struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0';
char url[4096];
@@ -416,8 +416,8 @@ cJSON *get_root_document_by_id(int index_id, int doc_id) {
void file(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.ptr + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != 20 || !parse_sid(&sid, hm->uri.buf + 3)) {
LOG_DEBUGF("serve.c", "Invalid file path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}
@@ -528,14 +528,14 @@ subreq_ctx_t *elastic_write_tag(const char *sid, const tag_req_t *req) {
void tag(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.ptr + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != 22 || !parse_sid(&sid, hm->uri.buf + 5)) {
LOG_DEBUGF("serve.c", "Invalid tag path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
@@ -612,7 +612,7 @@ int check_auth0(struct mg_http_message *hm) {
}
token_str = malloc(token.len + 1);
strncpy(token_str, token.ptr, token.len);
strncpy(token_str, token.buf, token.len);
*(token_str + token.len) = '\0';
int res = auth0_verify_jwt(
@@ -642,13 +642,15 @@ static void ev_router(struct mg_connection *nc, int ev, void *ev_data) {
}
char uri[256];
memcpy(uri, hm->uri.ptr, hm->uri.len);
memcpy(uri, hm->uri.buf, hm->uri.len);
*(uri + hm->uri.len) = '\0';
LOG_DEBUGF("serve.c", "<%s> GET %s",
web_address_to_string(&(nc->rem)),
uri
);
#define mg_http_match_uri(hm, pattern) mg_match((hm)->uri, mg_str(pattern), NULL)
if (mg_http_match_uri(hm, "/")) {
serve_index_html(nc, hm);
return;

View File

@@ -420,8 +420,8 @@ void fts_get_document(struct mg_connection *nc, struct mg_http_message *hm) {
sist_id_t sid;
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.ptr + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.ptr);
if (hm->uri.len != 24 || !parse_sid(&sid, hm->uri.buf + 7)) {
LOG_DEBUGF("serve.c", "Invalid /fts/d/ path: %.*s", (int) hm->uri.len, hm->uri.buf);
HTTP_REPLY_NOT_FOUND
return;
}

View File

@@ -73,7 +73,7 @@ cJSON *web_get_json_body(struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0';
cJSON *json = cJSON_Parse(body);
free(body);
@@ -87,7 +87,7 @@ char *web_get_string_body(struct mg_http_message *hm) {
}
char *body = malloc(hm->body.len + 1);
memcpy(body, hm->body.ptr, hm->body.len);
memcpy(body, hm->body.buf, hm->body.len);
*(body + hm->body.len) = '\0';
return body;

View File

@@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE;
}
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
}
@@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
}

View File

@@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
if (meta_contains_key(doc->meta_head, key)) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
key, tag->value);
return;
}
text_buffer_t tex = text_buffer_create(-1);
@@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}

View File

@@ -172,6 +172,8 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h"

View File

@@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job;
}
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif