mirror of
https://github.com/simon987/sist2.git
synced 2025-04-03 15:33:00 +00:00
Compare commits
14 Commits
dbb62e6b17
...
fcd2a27c11
Author | SHA1 | Date | |
---|---|---|---|
|
fcd2a27c11 | ||
|
d800effad9 | ||
|
371e9c408e | ||
|
ee1b1d8bb4 | ||
|
63a097a463 | ||
|
7a03a2202e | ||
|
050fc500ce | ||
|
d44679131b | ||
|
4dd5e70406 | ||
|
5a82581992 | ||
|
0dc18a56c0 | ||
|
258b2e31e6 | ||
|
c726074029 | ||
|
847cdaf3e5 |
36
README.md
36
README.md
@ -1,5 +1,5 @@
|
||||

|
||||
[](https://www.codefactor.io/repository/github/simon987/sist2)
|
||||

|
||||
[](https://www.codefactor.io/repository/github/sist2app/sist2)
|
||||
[](https://files.simon987.net/.gate/sist2/simon987_sist2/)
|
||||
|
||||
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
|
||||
@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
|
||||
### Using Docker Compose *(Windows/Linux/Mac)*
|
||||
|
||||
```yaml
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:7.17.9
|
||||
@ -53,11 +51,11 @@ services:
|
||||
- "PUID=1000"
|
||||
- "PGID=1000"
|
||||
sist2-admin:
|
||||
image: simon987/sist2:3.4.2-x64-linux
|
||||
image: sist2app/sist2:x64-linux
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /data/sist2-admin-data/:/sist2-admin/
|
||||
- /:/host
|
||||
- /<path to index>/:/host
|
||||
ports:
|
||||
- 4090:4090
|
||||
# NOTE: Don't expose this port publicly!
|
||||
@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
|
||||
```
|
||||
* **SQLite**: No installation required
|
||||
|
||||
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
|
||||
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
|
||||
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
|
||||
3. See [usage guide](docs/USAGE.md) for command line usage.
|
||||
|
||||
@ -100,20 +98,20 @@ Example usage:
|
||||
| File type | Library | Content | Thumbnail | Metadata |
|
||||
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
|
||||
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
|
||||
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
|
||||
| `audio/*` | ffmpeg | - | yes | ID3 tags |
|
||||
| `video/*` | ffmpeg | - | yes | title, comment, artist |
|
||||
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
|
||||
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
|
||||
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
|
||||
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
|
||||
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
|
||||
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
||||
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
|
||||
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
|
||||
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
|
||||
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
|
||||
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
|
||||
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
|
||||
|
||||
\* *See [Archive files](#archive-files)*
|
||||
|
||||
@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
|
||||
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
|
||||
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
|
||||
|
||||
The `simon987/sist2` image comes with common languages
|
||||
The `sist2app/sist2` image comes with common languages
|
||||
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
|
||||
|
||||
You can use the `+` separator to specify multiple languages. The language
|
||||
@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
|
||||
to enable it.
|
||||
|
||||
The text processing is done in your browser, no data is sent to any third-party services.
|
||||
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
|
||||
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
|
||||
|
||||
#### List of available repositories:
|
||||
|
||||
| URL | Maintainer | Purpose |
|
||||
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
|
||||
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
|
||||
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
|
||||
|
||||
<details>
|
||||
<summary>Screenshot</summary>
|
||||
@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
|
||||
### Using docker
|
||||
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
git clone --recursive https://github.com/sist2app/sist2/
|
||||
cd sist2
|
||||
docker build . -t my-sist2-image
|
||||
# Copy sist2 executable from docker image
|
||||
@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
|
||||
```
|
||||
|
||||
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
|
||||
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
|
||||
3. Install vcpkg dependencies
|
||||
|
||||
```bash
|
||||
@ -223,7 +221,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
|
||||
|
||||
4. Build
|
||||
```bash
|
||||
git clone --recursive https://github.com/simon987/sist2/
|
||||
git clone --recursive https://github.com/sist2app/sist2/
|
||||
(cd sist2-vue; npm install; npm run build)
|
||||
(cd sist2-admin/frontend; npm install; npm run build)
|
||||
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
|
||||
|
@ -18,7 +18,7 @@ services:
|
||||
container_name: sist2-admin
|
||||
volumes:
|
||||
- /data/sist2-admin-data/:/sist2-admin/
|
||||
- /:/host
|
||||
- /<path to index>/:/host
|
||||
ports:
|
||||
- 4090:4090
|
||||
# NOTE: Don't export this port publicly!
|
||||
|
@ -1,5 +1,16 @@
|
||||
|
||||
with open("/usr/lib/file/magic.mgc", "rb") as f:
|
||||
data = f.read()
|
||||
MAGIC_PATHS = [
|
||||
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
|
||||
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
|
||||
"/usr/lib/file/magic.mgc"
|
||||
]
|
||||
|
||||
for path in MAGIC_PATHS:
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
data = f.read()
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))
|
||||
|
44
sist2-admin/frontend/package-lock.json
generated
44
sist2-admin/frontend/package-lock.json
generated
@ -3655,12 +3655,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
@ -5892,9 +5892,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
@ -7435,12 +7435,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/micromatch": {
|
||||
"version": "4.0.5",
|
||||
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
|
||||
"integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
|
||||
"integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"braces": "^3.0.2",
|
||||
"braces": "^3.0.3",
|
||||
"picomatch": "^2.3.1"
|
||||
},
|
||||
"engines": {
|
||||
@ -13705,12 +13705,12 @@
|
||||
}
|
||||
},
|
||||
"braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
}
|
||||
},
|
||||
"browserslist": {
|
||||
@ -15333,9 +15333,9 @@
|
||||
}
|
||||
},
|
||||
"fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
@ -16472,12 +16472,12 @@
|
||||
"dev": true
|
||||
},
|
||||
"micromatch": {
|
||||
"version": "4.0.5",
|
||||
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
|
||||
"integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
|
||||
"integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"braces": "^3.0.2",
|
||||
"braces": "^3.0.3",
|
||||
"picomatch": "^2.3.1"
|
||||
}
|
||||
},
|
||||
|
@ -1817,7 +1817,13 @@ brace-expansion@^1.1.7:
|
||||
balanced-match "^1.0.0"
|
||||
concat-map "0.0.1"
|
||||
|
||||
braces@^3.0.2, braces@~3.0.2:
|
||||
braces@^3.0.3:
|
||||
version "3.0.3"
|
||||
resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789"
|
||||
dependencies:
|
||||
fill-range "^7.1.1"
|
||||
|
||||
braces@~3.0.2:
|
||||
version "3.0.2"
|
||||
resolved "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz"
|
||||
dependencies:
|
||||
@ -2635,6 +2641,12 @@ fill-range@^7.0.1:
|
||||
dependencies:
|
||||
to-regex-range "^5.0.1"
|
||||
|
||||
fill-range@^7.1.1:
|
||||
version "7.1.1"
|
||||
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292"
|
||||
dependencies:
|
||||
to-regex-range "^5.0.1"
|
||||
|
||||
finalhandler@1.2.0:
|
||||
version "1.2.0"
|
||||
resolved "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz"
|
||||
@ -3310,10 +3322,10 @@ methods@~1.1.2:
|
||||
resolved "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz"
|
||||
|
||||
micromatch@^4.0.2, micromatch@^4.0.4:
|
||||
version "4.0.5"
|
||||
resolved "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz"
|
||||
version "4.0.8"
|
||||
resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.8.tgz#d66fa18f3a47076789320b9b1af32bd86d9fa202"
|
||||
dependencies:
|
||||
braces "^3.0.2"
|
||||
braces "^3.0.3"
|
||||
picomatch "^2.3.1"
|
||||
|
||||
mime-db@1.52.0, "mime-db@>= 1.43.0 < 2":
|
||||
|
@ -309,7 +309,7 @@ class Sist2Api {
|
||||
}
|
||||
|
||||
getTagsSqlite() {
|
||||
return axios.get(`${this.baseUrl}/fts/tags`)
|
||||
return axios.get(`${this.baseUrl}fts/tags`)
|
||||
.then(resp => {
|
||||
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
|
||||
});
|
||||
@ -566,7 +566,7 @@ class Sist2Api {
|
||||
}
|
||||
|
||||
getDocumentSqlite(sid) {
|
||||
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
|
||||
return axios.get(`${this.baseUrl}fts/d/${sid}`)
|
||||
.then(resp => ({
|
||||
_source: resp.data
|
||||
}));
|
||||
@ -589,7 +589,7 @@ class Sist2Api {
|
||||
}
|
||||
|
||||
getTagSuggestionsSqlite(prefix) {
|
||||
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
|
||||
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
|
||||
.then(resp => (resp.data));
|
||||
}
|
||||
|
||||
@ -620,7 +620,7 @@ class Sist2Api {
|
||||
}
|
||||
|
||||
getEmbeddings(sid, modelId) {
|
||||
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
|
||||
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
|
||||
.then(resp => (resp.data));
|
||||
}
|
||||
}
|
||||
|
@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
|
||||
}
|
||||
|
||||
if (dateMin && dateMax) {
|
||||
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
|
||||
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
|
||||
} else if (dateMin) {
|
||||
filters.push({range: {mtime: {gte: dateMin}}})
|
||||
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
|
||||
} else if (dateMax) {
|
||||
filters.push({range: {mtime: {lte: dateMax}}})
|
||||
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
|
||||
}
|
||||
|
||||
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
|
||||
|
@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
|
||||
"/usr/share/tessdata/",
|
||||
"/usr/share/tesseract-ocr/tessdata/",
|
||||
"/usr/share/tesseract-ocr/4.00/tessdata/",
|
||||
"/usr/share/tesseract-ocr/5/tessdata/",
|
||||
"./",
|
||||
NULL
|
||||
};
|
||||
|
@ -55,7 +55,7 @@
|
||||
static const char *const Version = VERSION;
|
||||
static const int VersionMajor = 3;
|
||||
static const int VersionMinor = 4;
|
||||
static const int VersionPatch = 2;
|
||||
static const int VersionPatch = 3;
|
||||
|
||||
#ifndef SIST_PLATFORM
|
||||
#define SIST_PLATFORM unknown
|
||||
|
14
third-party/libscan/libscan/ebook/ebook.c
vendored
14
third-party/libscan/libscan/ebook/ebook.c
vendored
@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define IS_IGNORED_MESSAGE(message) \
|
||||
( \
|
||||
strstr(message, "invalid glyph index") \
|
||||
|| strstr(message, "... repeated") \
|
||||
) \
|
||||
|
||||
void fz_err_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
if (IS_IGNORED_MESSAGE(message)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
|
||||
}
|
||||
@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
|
||||
void fz_warn_callback(void *user, const char *message) {
|
||||
document_t *doc = (document_t *) user;
|
||||
|
||||
if (IS_IGNORED_MESSAGE(message)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const scan_ebook_ctx_t *ctx = &thread_ctx;
|
||||
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
|
||||
}
|
||||
|
14
third-party/libscan/libscan/media/media.c
vendored
14
third-party/libscan/libscan/media/media.c
vendored
@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
|
||||
|
||||
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
|
||||
|
||||
meta_line_t *meta = doc->meta_head;
|
||||
while (meta != NULL) {
|
||||
if (meta->key == key) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
|
||||
key, meta->str_val, key, tag->value);
|
||||
return;
|
||||
}
|
||||
meta = meta->next;
|
||||
if (meta_contains_key(doc->meta_head, key)) {
|
||||
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
|
||||
key, tag->value);
|
||||
return;
|
||||
}
|
||||
|
||||
text_buffer_t tex = text_buffer_create(-1);
|
||||
@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
|
||||
return SAVE_THUMBNAIL_FAILED;
|
||||
}
|
||||
|
||||
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
|
||||
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
|
||||
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
|
||||
}
|
||||
|
||||
|
14
third-party/libscan/libscan/util.h
vendored
14
third-party/libscan/libscan/util.h
vendored
@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
|
||||
return job;
|
||||
}
|
||||
|
||||
|
||||
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
|
||||
|
||||
meta_line_t *meta = meta_head;
|
||||
while (meta != NULL) {
|
||||
if (meta->key == key) {
|
||||
return TRUE;
|
||||
}
|
||||
meta = meta->next;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user