Compare commits

..

2 Commits

Author SHA1 Message Date
dependabot[bot]
dbb62e6b17 Merge 847cdaf3e5 into 7873ef003d 2025-01-23 19:46:58 +00:00
dependabot[bot]
847cdaf3e5 Bump micromatch from 4.0.5 to 4.0.8 in /sist2-admin/frontend
Bumps [micromatch](https://github.com/micromatch/micromatch) from 4.0.5 to 4.0.8.
- [Release notes](https://github.com/micromatch/micromatch/releases)
- [Changelog](https://github.com/micromatch/micromatch/blob/master/CHANGELOG.md)
- [Commits](https://github.com/micromatch/micromatch/compare/4.0.5...4.0.8)

---
updated-dependencies:
- dependency-name: micromatch
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-01 02:05:31 +00:00
34 changed files with 258 additions and 509 deletions

View File

@@ -62,9 +62,7 @@ add_executable(
src/database/database_schema.c
src/database/database_fts.c
src/web/web_fts.c
src/database/database_embeddings.c
src/ignorelist.c
src/ignorelist.h)
src/database/database_embeddings.c)
set_target_properties(sist2 PROPERTIES LINKER_LANGUAGE C)
target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
@@ -78,7 +76,6 @@ find_package(CURL CONFIG REQUIRED)
find_library(MAGIC_LIB NAMES libmagic.a REQUIRED)
find_package(unofficial-sqlite3 CONFIG REQUIRED)
find_package(OpenBLAS CONFIG REQUIRED)
find_package(libgit2 CONFIG REQUIRED)
target_include_directories(
@@ -92,7 +89,7 @@ target_include_directories(
target_compile_options(
sist2
PRIVATE
# -fPIC
-fPIC
)
if (SIST_DEBUG)
@@ -150,9 +147,8 @@ add_dependencies(
target_link_libraries(
sist2
# m
m
z
libgit2::libgit2package
argparse
unofficial::mongoose::mongoose
CURL::libcurl

View File

@@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@@ -38,6 +38,8 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)*
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
@@ -51,11 +53,11 @@ services:
- "PUID=1000"
- "PGID=1000"
sist2-admin:
image: sist2app/sist2:x64-linux
image: simon987/sist2:3.4.2-x64-linux
restart: unless-stopped
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host
- /:/host
ports:
- 4090:4090
# NOTE: Don't expose this port publicly!
@@ -79,7 +81,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
@@ -98,20 +100,20 @@ Example usage:
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -135,7 +137,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `sist2app/sist2` image comes with common languages
The `simon987/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language
@@ -175,13 +177,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
<details>
<summary>Screenshot</summary>
@@ -197,7 +199,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker
```bash
git clone --recursive https://github.com/sist2app/sist2/
git clone --recursive https://github.com/simon987/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
@@ -212,18 +214,18 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
3. Install vcpkg dependencies
```bash
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib] libgit2[core,pcre]
vcpkg install openblas curl[core,openssl] sqlite3[core,fts5,json1] cpp-jwt pcre cjson brotli libarchive[core,bzip2,libxml2,lz4,lzma,lzo] pthread tesseract libxml2 libmupdf[ocr] gtest mongoose libmagic libraw gumbo ffmpeg[core,avcodec,avformat,swscale,swresample,webp,opus,mp3lame,vpx,zlib]
```
4. Build
```bash
git clone --recursive https://github.com/sist2app/sist2/
git clone --recursive https://github.com/simon987/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

View File

@@ -18,7 +18,7 @@ services:
container_name: sist2-admin
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /<path to index>/:/host
- /:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!

View File

@@ -108,27 +108,6 @@ sist scan ~/Documents -o ./documents.sist2 --incremental
sist scan ~/Documents -o ./documents.sist2 --incremental
```
### Excluding files
You can use the `--exclude` option to specify exclude patterns. For more complex setups, you can create a
`.sist2ignore` file at the root of the scan path (For example, `~/Documents/.sist2ignore` for the example above).
The syntax for sist2ignore is the same as .gitignore for Git (reference [here](https://git-scm.com/docs/gitignore)).
Example:
**.sist2ignore**
```gitignore
# Ignore all PDF files
*.pdf
# But don't ignore them for the /important_files/ directory
!/important_files/*.pdf
# Ignore all files in _staging/ directories
_staging/
```
### Index documents to Elasticsearch search backend
```bash

View File

@@ -1,16 +1,5 @@
MAGIC_PATHS = [
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -449,5 +449,4 @@ image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
image/jp2, jp2
image/x-epson-erf, erf
1 application/x-matlab-data mat
449 image/x-sony-arw arw
450 image/x-sony-sr2 sr2
451 image/x-sony-srf srf
452 image/x-epson-erf erf
image/jp2 jp2

View File

@@ -3655,12 +3655,12 @@
}
},
"node_modules/braces": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
"dev": true,
"dependencies": {
"fill-range": "^7.0.1"
"fill-range": "^7.1.1"
},
"engines": {
"node": ">=8"
@@ -5892,9 +5892,9 @@
}
},
"node_modules/fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
"dev": true,
"dependencies": {
"to-regex-range": "^5.0.1"
@@ -7435,12 +7435,12 @@
}
},
"node_modules/micromatch": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
"integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
"version": "4.0.8",
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
"integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
"dev": true,
"dependencies": {
"braces": "^3.0.2",
"braces": "^3.0.3",
"picomatch": "^2.3.1"
},
"engines": {
@@ -13705,12 +13705,12 @@
}
},
"braces": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
"dev": true,
"requires": {
"fill-range": "^7.0.1"
"fill-range": "^7.1.1"
}
},
"browserslist": {
@@ -15333,9 +15333,9 @@
}
},
"fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
"dev": true,
"requires": {
"to-regex-range": "^5.0.1"
@@ -16472,12 +16472,12 @@
"dev": true
},
"micromatch": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
"integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
"version": "4.0.8",
"resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
"integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
"dev": true,
"requires": {
"braces": "^3.0.2",
"braces": "^3.0.3",
"picomatch": "^2.3.1"
}
},

View File

@@ -93,9 +93,7 @@ export default {
threads: "Number of threads",
batchSize: "Index batch size",
script: "User script",
searchIndex: "Search index file location",
esMappings: "Elasticsearch mappings file override",
esSettings: "Elasticsearch settings file override"
searchIndex: "Search index file location"
},
scanOptions: {
title: "Scanning options",

View File

@@ -44,12 +44,6 @@
<label>{{ $t("backendOptions.batchSize") }}</label>
<b-form-input v-model="backend.batch_size" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("backendOptions.esMappings") }}</label>
<b-form-textarea v-model="backend.es_mappings" rows="4" @change="update()"></b-form-textarea>
<label>{{ $t("backendOptions.esSettings") }}</label>
<b-form-textarea v-model="backend.es_settings" rows="4" @change="update()"></b-form-textarea>
</template>
<template v-else>
<label>{{ $t("backendOptions.searchIndex") }}</label>

View File

@@ -1817,7 +1817,13 @@ brace-expansion@^1.1.7:
balanced-match "^1.0.0"
concat-map "0.0.1"
braces@^3.0.2, braces@~3.0.2:
braces@^3.0.3:
version "3.0.3"
resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789"
dependencies:
fill-range "^7.1.1"
braces@~3.0.2:
version "3.0.2"
resolved "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz"
dependencies:
@@ -2635,6 +2641,12 @@ fill-range@^7.0.1:
dependencies:
to-regex-range "^5.0.1"
fill-range@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292"
dependencies:
to-regex-range "^5.0.1"
finalhandler@1.2.0:
version "1.2.0"
resolved "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz"
@@ -3310,10 +3322,10 @@ methods@~1.1.2:
resolved "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz"
micromatch@^4.0.2, micromatch@^4.0.4:
version "4.0.5"
resolved "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz"
version "4.0.8"
resolved "https://registry.yarnpkg.com/micromatch/-/micromatch-4.0.8.tgz#d66fa18f3a47076789320b9b1af32bd86d9fa202"
dependencies:
braces "^3.0.2"
braces "^3.0.3"
picomatch "^2.3.1"
mime-db@1.52.0, "mime-db@>= 1.43.0 < 2":

View File

@@ -4,4 +4,4 @@ uvicorn
websockets
pycron
GitPython
git+https://github.com/sist2app/sist2-python.git@2.1
git+https://github.com/sist2app/sist2-python.git

View File

@@ -220,7 +220,7 @@ class Sist2IndexTask(Sist2Task):
except ProcessLookupError:
pass
try:
os.waitpid(pid, 0)
os.wait()
except ChildProcessError:
pass

View File

@@ -10,7 +10,7 @@ from logging import FileHandler, StreamHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List, Optional
from typing import List
from pydantic import BaseModel
@@ -40,8 +40,6 @@ class Sist2SearchBackend(BaseModel):
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_mappings: Optional[str] = None
es_settings: Optional[str] = None
es_index: str = "sist2"
threads: int = 1
batch_size: int = 70
@@ -59,8 +57,6 @@ class IndexOptions(BaseModel):
path: str = None
incremental_index: bool = True
search_backend: str = None
es_mappings_file: Optional[str] = None
es_settings_file: Optional[str] = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -79,12 +75,6 @@ class IndexOptions(BaseModel):
if search_backend.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.es_mappings_file:
args.append(f"--mappings-file={self.es_mappings_file}")
if self.es_settings_file:
args.append(f"--settings-file={self.es_settings_file}")
if self.incremental_index:
args.append(f"--incremental-index")
@@ -259,20 +249,6 @@ class Sist2:
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb, set_pid_cb):
if search_backend.es_mappings:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(search_backend.es_mappings)
options.es_mappings_file = f.name
else:
options.es_mappings_file = None
if search_backend.es_settings:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(search_backend.es_settings)
options.es_settings_file = f.name
else:
options.es_settings_file = None
args = [
self.bin_path,
*options.args(search_backend),

View File

@@ -69,8 +69,7 @@ class Sist2Api {
hit._props.isImage = true;
}
if ("width" in hit._source && !hit._props.isSubDocument && hit._source.videoc !== "tiff"
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm"
&& hit._source.mime !== "image/jp2") {
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") {
hit._props.isPlayableImage = true;
}
if ("width" in hit._source && "height" in hit._source) {
@@ -310,7 +309,7 @@ class Sist2Api {
}
getTagsSqlite() {
return axios.get(`${this.baseUrl}fts/tags`)
return axios.get(`${this.baseUrl}/fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
@@ -567,7 +566,7 @@ class Sist2Api {
}
getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}fts/d/${sid}`)
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
.then(resp => ({
_source: resp.data
}));
@@ -590,7 +589,7 @@ class Sist2Api {
}
getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
@@ -621,7 +620,7 @@ class Sist2Api {
}
getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data));
}
}

View File

@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
filters.push({range: {mtime: {gte: dateMin}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
filters.push({range: {mtime: {lte: dateMax}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes
@@ -238,7 +238,7 @@ class Sist2ElasticsearchQuery {
pre_tags: ["<mark>"],
post_tags: ["</mark>"],
fragment_size: getters.optFragmentSize,
number_of_fragments: getters.optFragmentCount,
number_of_fragments: 1,
order: "score",
fields: {
content: {},

View File

@@ -3,8 +3,6 @@
</template>
<script>
const FRAGMENT_SEPARATOR = "<br /><i style='line-height: 2.4'>[…]</i><br/>";
export default {
name: "ContentDiv",
props: ["doc"],
@@ -15,10 +13,10 @@ export default {
}
if (this.doc.highlight["content.nGram"]) {
return this.doc.highlight["content.nGram"].join(FRAGMENT_SEPARATOR);
return this.doc.highlight["content.nGram"][0];
}
if (this.doc.highlight.content) {
return this.doc.highlight.content.join(FRAGMENT_SEPARATOR);
return this.doc.highlight.content[0];
}
}
}

View File

@@ -59,7 +59,6 @@ export default {
searchInPath: "Enable matching query against document path",
suggestPath: "Enable auto-complete in path filter bar",
fragmentSize: "Highlight context size",
fragmentCount: "Number of highlight snippets",
queryMode: "Search mode",
displayMode: "Display",
columns: "Column count",
@@ -243,7 +242,6 @@ export default {
searchInPath: "Abgleich der Abfrage mit dem Dokumentpfad aktivieren",
suggestPath: "Aktiviere Auto-Vervollständigung in Pfadfilter-Leiste",
fragmentSize: "Kontextgröße",
fragmentCount: "Anzahl der hervorgehobenen Snippets",
queryMode: "Such-Modus",
displayMode: "Ansicht",
columns: "Anzahl Spalten",
@@ -419,7 +417,6 @@ export default {
searchInPath: "Activer la recherche dans le chemin des documents",
suggestPath: "Activer l'autocomplétion dans la barre de filtre de chemin",
fragmentSize: "Longueur du contexte de surlignage",
fragmentCount: "Nombre d'extraits surlignés",
queryMode: "Mode de recherche",
displayMode: "Affichage",
columns: "Nombre de colonnes",
@@ -595,7 +592,6 @@ export default {
searchInPath: "匹配文档路径",
suggestPath: "搜索框启用自动补全",
fragmentSize: "高亮上下文大小",
fragmentCount: "突出显示的项目数",
queryMode: "搜索模式",
displayMode: "显示",
columns: "列数",
@@ -771,7 +767,6 @@ export default {
searchInPath: "Włącz szukanie również w ścieżce dokumentu",
suggestPath: "Włącz auto-uzupełnianie w filtrze ścieżek",
fragmentSize: "Podświetl wielkość kontekstu w znakach",
fragmentCount: "Liczba wyróżnionych fragmentów",
queryMode: "Tryb szukania",
displayMode: "Wyświetlanie",
columns: "Liczba kolumn",

View File

@@ -3,7 +3,7 @@ import Vuex from "vuex"
import {deserializeMimes, randomSeed, serializeMimes} from "@/util";
import {getInstance} from "@/plugins/auth0.js";
const CONF_VERSION = 4;
const CONF_VERSION = 3;
Vue.use(Vuex);
@@ -41,7 +41,6 @@ export default new Vuex.Store({
optTagOrOperator: false,
optFuzzy: true,
optFragmentSize: 200,
optFragmentCount: 1,
optQueryMode: "simple",
optSearchInPath: false,
optColumns: "auto",
@@ -171,7 +170,6 @@ export default new Vuex.Store({
setOptSearchInPath: (state, val) => state.optSearchInPath = val,
setOptSuggestPath: (state, val) => state.optSuggestPath = val,
setOptFragmentSize: (state, val) => state.optFragmentSize = val,
setOptFragmentCount: (state, val) => state.optFragmentCount = val,
setOptQueryMode: (state, val) => state.optQueryMode = val,
setOptResultSize: (state, val) => state.optSize = val,
setOptTagOrOperator: (state, val) => state.optTagOrOperator = val,
@@ -432,7 +430,6 @@ export default new Vuex.Store({
optSearchInPath: state => state.optSearchInPath,
optSuggestPath: state => state.optSuggestPath,
optFragmentSize: state => state.optFragmentSize,
optFragmentCount: state => state.optFragmentCount,
optQueryMode: state => state.optQueryMode,
optTreemapType: state => state.optTreemapType,
optTreemapTiling: state => state.optTreemapTiling,

View File

@@ -151,10 +151,6 @@
<b-form-input :value="optFragmentSize" step="10" type="number" min="0"
@input="setOptFragmentSize"></b-form-input>
<label :class="{'text-muted': uiSqliteMode}">{{ $t("opt.fragmentCount") }}</label>
<b-form-input :value="optFragmentCount" :disabled="uiSqliteMode" step="1" type="number" min="1"
@input="setOptFragmentCount"></b-form-input>
<label>{{ $t("opt.resultSize") }}</label>
<b-form-input :value="optResultSize" type="number" min="10"
@input="setOptResultSize"></b-form-input>
@@ -318,7 +314,6 @@ export default {
"optSearchInPath",
"optSuggestPath",
"optFragmentSize",
"optFragmentCount",
"optQueryMode",
"optTreemapType",
"optTreemapTiling",
@@ -365,7 +360,6 @@ export default {
"setOptSearchInPath",
"setOptSuggestPath",
"setOptFragmentSize",
"setOptFragmentCount",
"setOptQueryMode",
"setOptTreemapType",
"setOptTreemapTiling",

View File

@@ -25,7 +25,6 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./",
NULL
};

View File

@@ -19,7 +19,6 @@
#include "src/database/database.h"
#include "src/index/elastic.h"
#include "sqlite3.h"
#include "ignorelist.h"
#include <pcre.h>
@@ -35,7 +34,6 @@ typedef struct {
pcre *exclude;
pcre_extra *exclude_extra;
int fast;
ignorelist_t *ignorelist;
scan_arc_ctx_t arc_ctx;
scan_comic_ctx_t comic_ctx;

View File

@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX];
strcpy(buf, current_job);
SET_CURRENT_JOB(ipc_ctx, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
}

View File

@@ -64,8 +64,6 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct {
double date_min;
double date_max;

View File

@@ -1,106 +0,0 @@
#include "ignorelist.h"
#include "ctx.h"
#include <git2.h>
typedef struct ignorelist {
git_repository *repo;
char repo_path[PATH_MAX];
int has_rules;
} ignorelist_t;
char *get_tempdir() {
char *tempdir_env = getenv("TMPDIR");
if (tempdir_env != NULL) {
return tempdir_env;
}
return "/tmp/";
}
void ignorelist_destroy(ignorelist_t* ignorelist) {
git_libgit2_shutdown();
if (ignorelist->repo != NULL) {
git_repository_free(ignorelist->repo);
}
free(ignorelist);
}
ignorelist_t *ignorelist_create() {
git_libgit2_init();
ignorelist_t *ignorelist = malloc(sizeof(ignorelist_t));
ignorelist->repo = NULL;
ignorelist->has_rules = FALSE;
char *tempdir = get_tempdir();
if (tempdir[strlen(tempdir) - 1] == '/') {
sprintf(ignorelist->repo_path, "%ssist2-ignorelist-%d", tempdir, getpid());
} else {
sprintf(ignorelist->repo_path, "%s/sist2-ignorelist-%d", tempdir, getpid());
}
return ignorelist;
}
void ignorelist_load_ignore_file(ignorelist_t *ignorelist, const char *filepath) {
FILE *file;
char line[PATH_MAX * 2];
file = fopen(filepath, "r");
if(file == NULL) {
// No ignore list
return;
}
LOG_DEBUGF("ignorelist.c", "Opening temporary git repository %s", ignorelist->repo_path);
int init_result = git_repository_init(&ignorelist->repo, ignorelist->repo_path, TRUE);
if (init_result != 0) {
LOG_FATALF("ignorelist.c", "Got error code from git_repository_init(): %d", init_result);
}
git_ignore_clear_internal_rules(ignorelist->repo);
while(fgets(line, PATH_MAX * 2, file)){
line[strlen(line) - 1] = '\0'; // Strip trailing newline
char *rules = {line,};
int result = git_ignore_add_rule(ignorelist->repo, rules);
if (result == 0) {
LOG_DEBUGF("ignorelist.c", "Load ignore rule: %s", line);
ignorelist->has_rules = TRUE;
} else {
LOG_FATALF("ignorelist.c", "Invalid ignore rule: %s", line);
}
}
fclose(file);
}
int ignorelist_is_ignored(ignorelist_t *ignorelist, const char *filepath) {
if (!ignorelist->has_rules) {
return FALSE;
}
const char *rel_path = filepath + ScanCtx.index.desc.root_len;
int ignored = -1;
int result = git_ignore_path_is_ignored(&ignored, ignorelist->repo, rel_path);
if (result != 0) {
LOG_FATALF("ignorelist.c", "git_ignore_path_is_ignored returned error code: %d", result);
}
return ignored;
}

View File

@@ -1,16 +0,0 @@
#ifndef SIST2_IGNORELIST_H
#define SIST2_IGNORELIST_H
#include "src/sist.h"
typedef struct ignorelist ignorelist_t;
ignorelist_t *ignorelist_create();
void ignorelist_destroy(ignorelist_t* ignorelist);
void ignorelist_load_ignore_file(ignorelist_t* ignorelist, const char* filepath);
int ignorelist_is_ignored(ignorelist_t* ignorelist, const char* filepath);
#endif //SIST2_IGNORELIST_H

View File

@@ -23,17 +23,8 @@ int handle_entry(const char *filepath, const struct stat *info, int typeflag, st
if (ScanCtx.exclude != NULL && EXCLUDED(filepath)) {
LOG_DEBUGF("walk.c", "Excluded: %s", filepath);
if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}
return FTW_CONTINUE;
}
if (ignorelist_is_ignored(ScanCtx.ignorelist, filepath)) {
LOG_DEBUGF("walk.c", "Ignored: %s", filepath);
if (typeflag == FTW_D) {
if (typeflag == FTW_F && S_ISREG(info->st_mode)) {
} else if (typeflag == FTW_D) {
return FTW_SKIP_SUBTREE;
}

View File

@@ -11,7 +11,6 @@
#include "web/serve.h"
#include "parsing/mime.h"
#include "parsing/parse.h"
#include "ignorelist.h"
#include <signal.h>
#include <pthread.h>
@@ -240,13 +239,6 @@ void sist2_scan(scan_args_t *args) {
LOG_INFOF("main.c", "sist2 v%s", Version);
ScanCtx.ignorelist = ignorelist_create();
char ignore_filepath[PATH_MAX];
sprintf(ignore_filepath, "%s.sist2ignore", args->path);
ignorelist_load_ignore_file(ScanCtx.ignorelist, ignore_filepath);
ScanCtx.pool = tpool_create(ScanCtx.threads, TRUE);
tpool_start(ScanCtx.pool);
@@ -276,7 +268,6 @@ void sist2_scan(scan_args_t *args) {
database_generate_stats(db, args->treemap_threshold);
database_close(db, args->optimize_database);
ignorelist_destroy(ScanCtx.ignorelist);
}
void sist2_index(index_args_t *args) {

View File

@@ -295,167 +295,166 @@ image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jp2=524579,
image_jpeg=524580,
image_jutvision=524581,
image_naplps=524582,
image_pict=524583,
image_png=524584,
image_svg=524585 | 0x80000000,
image_svg_xml=524586 | 0x80000000,
image_tiff=524587,
image_vnd_adobe_photoshop=524588 | 0x80000000,
image_vnd_djvu=524589 | 0x80000000,
image_vnd_fpx=524590,
image_vnd_microsoft_icon=524591,
image_vnd_rn_realflash=524592,
image_vnd_rn_realpix=524593,
image_vnd_wap_wbmp=524594,
image_vnd_xiff=524595,
image_webp=524596,
image_wmf=524597,
image_x_3ds=524598,
image_x_adobe_dng=524599 | 0x00800000,
image_x_award_bioslogo=524600,
image_x_canon_cr2=524601 | 0x00800000,
image_x_canon_crw=524602 | 0x00800000,
image_x_cmu_raster=524603,
image_x_cur=524604,
image_x_dcraw=524605 | 0x00800000,
image_x_dwg=524606,
image_x_eps=524607,
image_x_epson_erf=524608 | 0x00800000,
image_x_exr=524609,
image_x_fuji_raf=524610 | 0x00800000,
image_x_gem=524611,
image_x_icns=524612,
image_x_icon=524613 | 0x80000000,
image_x_jg=524614,
image_x_jps=524615,
image_x_kodak_dcr=524616 | 0x00800000,
image_x_kodak_k25=524617 | 0x00800000,
image_x_kodak_kdc=524618 | 0x00800000,
image_x_minolta_mrw=524619 | 0x00800000,
image_x_ms_bmp=524620,
image_x_niff=524621,
image_x_nikon_nef=524622 | 0x00800000,
image_x_olympus_orf=524623 | 0x00800000,
image_x_panasonic_raw=524624 | 0x00800000,
image_x_pcx=524625,
image_x_pentax_pef=524626 | 0x00800000,
image_x_pict=524627,
image_x_portable_bitmap=524628,
image_x_portable_graymap=524629,
image_x_portable_pixmap=524630,
image_x_quicktime=524631,
image_x_rgb=524632,
image_x_sigma_x3f=524633 | 0x00800000,
image_x_sony_arw=524634 | 0x00800000,
image_x_sony_sr2=524635 | 0x00800000,
image_x_sony_srf=524636 | 0x00800000,
image_x_tga=524637,
image_x_tiff=524638,
image_x_win_bitmap=524639,
image_x_xcf=524640 | 0x80000000,
image_x_xpixmap=524641 | 0x80000000,
image_x_xwindowdump=524642,
message_news=196963,
message_rfc822=196964,
model_vnd_dwf=65893,
model_vnd_gdl=65894,
model_vnd_gs_gdl=65895,
model_vrml=65896,
model_x_pov=65897,
text_PGP=590186,
text_asp=590187,
text_css=590188,
text_csv=590189,
text_html=590190 | 0x01000000,
text_javascript=590191,
text_mcf=590192,
text_pascal=590193,
text_plain=590194,
text_richtext=590195,
text_rtf=590196,
text_scriplet=590197,
text_tab_separated_values=590198,
text_troff=590199,
text_uri_list=590200,
text_vnd_abc=590201,
text_vnd_fmi_flexstor=590202,
text_vnd_wap_wml=590203,
text_vnd_wap_wmlscript=590204,
text_webviewhtml=590205,
text_x_Algol68=590206,
text_x_asm=590207,
text_x_audiosoft_intra=590208,
text_x_awk=590209,
text_x_bcpl=590210,
text_x_c=590211,
text_x_c__=590212,
text_x_component=590213,
text_x_diff=590214,
text_x_fortran=590215,
text_x_java=590216,
text_x_la_asf=590217,
text_x_lisp=590218,
text_x_m=590219,
text_x_m4=590220,
text_x_makefile=590221,
text_x_ms_regedit=590222,
text_x_msdos_batch=590223,
text_x_objective_c=590224,
text_x_pascal=590225,
text_x_perl=590226,
text_x_php=590227,
text_x_po=590228,
text_x_python=590229,
text_x_ruby=590230,
text_x_sass=590231,
text_x_script_python=590232,
text_x_scss=590233,
text_x_server_parsed_html=590234,
text_x_setext=590235,
text_x_sgml=590236 | 0x01000000,
text_x_shellscript=590237,
text_x_speech=590238,
text_x_tcl=590239,
text_x_tex=590240,
text_x_uil=590241,
text_x_uuencode=590242,
text_x_vcalendar=590243,
text_x_vcard=590244,
text_xml=590245 | 0x01000000,
video_MP2T=393638,
video_animaflex=393639,
video_avi=393640,
video_avs_video=393641,
video_mp4=393642,
video_mpeg=393643,
video_quicktime=393644,
video_vdo=393645,
video_vivo=393646,
video_vnd_rn_realvideo=393647,
video_vosaic=393648,
video_webm=393649,
video_x_amt_demorun=393650,
video_x_amt_showrun=393651,
video_x_atomic3d_feature=393652,
video_x_dl=393653,
video_x_dv=393654,
video_x_fli=393655,
video_x_flv=393656,
video_x_isvideo=393657,
video_x_jng=393658 | 0x80000000,
video_x_m4v=393659,
video_x_matroska=393660,
video_x_mng=393661,
video_x_motion_jpeg=393662,
video_x_ms_asf=393663,
video_x_msvideo=393664,
video_x_qtc=393665,
video_x_sgi_movie=393666,
x_epoc_x_sisx_app=721347,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_csv=590188,
text_html=590189 | 0x01000000,
text_javascript=590190,
text_mcf=590191,
text_pascal=590192,
text_plain=590193,
text_richtext=590194,
text_rtf=590195,
text_scriplet=590196,
text_tab_separated_values=590197,
text_troff=590198,
text_uri_list=590199,
text_vnd_abc=590200,
text_vnd_fmi_flexstor=590201,
text_vnd_wap_wml=590202,
text_vnd_wap_wmlscript=590203,
text_webviewhtml=590204,
text_x_Algol68=590205,
text_x_asm=590206,
text_x_audiosoft_intra=590207,
text_x_awk=590208,
text_x_bcpl=590209,
text_x_c=590210,
text_x_c__=590211,
text_x_component=590212,
text_x_diff=590213,
text_x_fortran=590214,
text_x_java=590215,
text_x_la_asf=590216,
text_x_lisp=590217,
text_x_m=590218,
text_x_m4=590219,
text_x_makefile=590220,
text_x_ms_regedit=590221,
text_x_msdos_batch=590222,
text_x_objective_c=590223,
text_x_pascal=590224,
text_x_perl=590225,
text_x_php=590226,
text_x_po=590227,
text_x_python=590228,
text_x_ruby=590229,
text_x_sass=590230,
text_x_script_python=590231,
text_x_scss=590232,
text_x_server_parsed_html=590233,
text_x_setext=590234,
text_x_sgml=590235 | 0x01000000,
text_x_shellscript=590236,
text_x_speech=590237,
text_x_tcl=590238,
text_x_tex=590239,
text_x_uil=590240,
text_x_uuencode=590241,
text_x_vcalendar=590242,
text_x_vcard=590243,
text_xml=590244 | 0x01000000,
video_MP2T=393637,
video_animaflex=393638,
video_avi=393639,
video_avs_video=393640,
video_mp4=393641,
video_mpeg=393642,
video_quicktime=393643,
video_vdo=393644,
video_vivo=393645,
video_vnd_rn_realvideo=393646,
video_vosaic=393647,
video_webm=393648,
video_x_amt_demorun=393649,
video_x_amt_showrun=393650,
video_x_atomic3d_feature=393651,
video_x_dl=393652,
video_x_dv=393653,
video_x_fli=393654,
video_x_flv=393655,
video_x_isvideo=393656,
video_x_jng=393657 | 0x80000000,
video_x_m4v=393658,
video_x_matroska=393659,
video_x_mng=393660,
video_x_motion_jpeg=393661,
video_x_ms_asf=393662,
video_x_msvideo=393663,
video_x_qtc=393664,
video_x_sgi_movie=393665,
x_epoc_x_sisx_app=721346,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_x_matlab_data: return "application/x-matlab-data";
@@ -909,7 +908,6 @@ case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
case image_jp2: return "image/jp2";
default: return NULL;}}
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
case 2495639202:return application_x_matlab_data;
@@ -1293,7 +1291,6 @@ case 1698465774:return image_x_sony_arw;
case 2083014127:return image_x_sony_sr2;
case 271503362:return image_x_sony_srf;
case 142938048:return image_x_epson_erf;
case 1575600018:return image_jp2;
default: return 0;}}
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
case 3272851765: return application_x_matlab_data;
@@ -1747,8 +1744,7 @@ case 3060720351: return image_x_sony_arw;
case 2944016606: return image_x_sony_sr2;
case 3279729971: return image_x_sony_srf;
case 1665206815: return image_x_epson_erf;
case 1849479005: return image_jp2;
default: return 0;}}
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524580,524581,524582,524583,524584,524585 | 0x80000000,524586 | 0x80000000,524587,524588 | 0x80000000,524589 | 0x80000000,524590,524591,524592,524593,524594,524595,524596,524597,524598,524600,524603,524604,524606,524607,524609,524611,524612,524613 | 0x80000000,524614,524615,524620,524621,524625,524627,524628,524629,524630,524631,524632,524637,524638,524639,524640 | 0x80000000,524641 | 0x80000000,524642,196963,196964,65893,65894,65895,65896,65897,590187,590188,590190 | 0x01000000,590191,590192,590193,590186,590194,590232,590189,655410,590195,590196,590197,590198,590199,590200,590201,590202,590204,590203,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590218,590220,590221,590245 | 0x01000000,590219,590223,590222,590224,590225,590226,590227,590228,590229,590230,590231,590233,590234,590235,590236 | 0x01000000,590237,590238,590239,590240,590241,590242,590243,590244,393639,393640,393641,393638,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657,393658 | 0x80000000,393659,393660,393661,393662,393663,393664,393665,393666,721347,655598,655420,524623 | 0x00800000,524622 | 0x00800000,524610 | 0x00800000,524624 | 0x00800000,524599 | 0x00800000,524601 | 0x00800000,524602 | 0x00800000,524605 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524619 | 0x00800000,524626 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524636 | 0x00800000,524608 | 0x00800000,524579,0};
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524579,524580,524581,524582,524583,524584 | 0x80000000,524585 | 0x80000000,524586,524587 | 0x80000000,524588 | 0x80000000,524589,524590,524591,524592,524593,524594,524595,524596,524597,524599,524602,524603,524605,524606,524608,524610,524611,524612 | 0x80000000,524613,524614,524619,524620,524624,524626,524627,524628,524629,524630,524631,524636,524637,524638,524639 | 0x80000000,524640 | 0x80000000,524641,196962,196963,65892,65893,65894,65895,65896,590186,590187,590189 | 0x01000000,590190,590191,590192,590185,590193,590231,590188,655410,590194,590195,590196,590197,590198,590199,590200,590201,590203,590202,590204,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590219,590220,590244 | 0x01000000,590218,590222,590221,590223,590224,590225,590226,590227,590228,590229,590230,590232,590233,590234,590235 | 0x01000000,590236,590237,590238,590239,590240,590241,590242,590243,393638,393639,393640,393637,393641,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657 | 0x80000000,393658,393659,393660,393661,393662,393663,393664,393665,721346,655598,655420,524622 | 0x00800000,524621 | 0x00800000,524609 | 0x00800000,524623 | 0x00800000,524598 | 0x00800000,524600 | 0x00800000,524601 | 0x00800000,524604 | 0x00800000,524615 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524625 | 0x00800000,524632 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524607 | 0x00800000,0};
unsigned int* get_mime_ids() { return mime_ids; }
#endif

View File

@@ -142,10 +142,6 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
}
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath);

View File

@@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.5.0"
#define VERSION "3.4.2"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 5;
static const int VersionPatch = 0;
static const int VersionMinor = 4;
static const int VersionPatch = 2;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown

View File

@@ -175,19 +175,9 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE;
}
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
}
@@ -195,10 +185,6 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
}

View File

@@ -223,10 +223,14 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
if (meta_contains_key(doc->meta_head, key)) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
key, tag->value);
return;
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
}
text_buffer_t tex = text_buffer_create(-1);
@@ -441,7 +445,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}

View File

@@ -172,8 +172,6 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h"

View File

@@ -392,18 +392,4 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job;
}
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif