Compare commits

..

22 Commits

Author SHA1 Message Date
Shy
bcab40783c Version bump 2025-07-05 08:51:28 -04:00
Shy
ea23bf01e3 Fixes #536 2025-07-05 08:51:15 -04:00
Shy
f5d070496f Closes #535 2025-07-05 08:17:32 -04:00
Shy
509770ee24 Version bump 2025-07-03 22:35:18 -04:00
Shy
c319547b1e Add image/jp2 mime type 2025-07-03 22:32:03 -04:00
Shy
04f993be75 Version bump 2025-06-12 21:04:35 -04:00
Shy
ab9eab3536 Merge pull request #527 from RickConsole/fix/index-hanging
fixed occasional hanging when indexing
2025-06-12 20:19:53 -04:00
Rick Console
8bb12f8ae2 fixed occasional hanging when indexing 2025-04-18 16:16:38 -04:00
Shy
670dad185e Fix #521 2025-03-19 19:22:17 -04:00
Shy
bbbd727e6a Update sist2-python version 2025-03-19 18:38:21 -04:00
Shy
d800effad9 Merge pull request #511 from dpieski/patch-5
Update README.md
2025-02-06 17:58:36 -05:00
Shy
371e9c408e Merge pull request #512 from dpieski/patch-6
Update README.md
2025-02-06 17:58:07 -05:00
Andrew
ee1b1d8bb4 Update README.md
Moved README references from simon987 to sist2app
2025-02-03 15:09:11 -06:00
Andrew
63a097a463 Update README.md
Update to the docker-compose.yml example.
2025-02-03 15:00:03 -06:00
Shy
7a03a2202e Fix #481 2025-01-24 19:40:08 -05:00
Shy
050fc500ce Fix #462 2025-01-24 19:22:01 -05:00
Shy
d44679131b Update compose file to avoid confusion. Fixes #490 2025-01-23 21:45:01 -05:00
Shy
4dd5e70406 Fix #492 2025-01-23 21:40:37 -05:00
Shy
5a82581992 Fix magic database problem 2025-01-23 21:40:27 -05:00
Shy
0dc18a56c0 Fix #509 2025-01-23 19:10:17 -05:00
Shy
258b2e31e6 Version bump 2025-01-23 19:10:02 -05:00
Shy
c726074029 Update tessdata paths 2025-01-23 19:09:54 -05:00
22 changed files with 291 additions and 211 deletions

View File

@@ -89,7 +89,7 @@ target_include_directories(
target_compile_options(
sist2
PRIVATE
-fPIC
# -fPIC
)
if (SIST_DEBUG)
@@ -147,7 +147,7 @@ add_dependencies(
target_link_libraries(
sist2
m
# m
z
argparse
unofficial::mongoose::mongoose

View File

@@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)*
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
@@ -53,11 +51,11 @@ services:
- "PUID=1000"
- "PGID=1000"
sist2-admin:
image: simon987/sist2:3.4.2-x64-linux
image: sist2app/sist2:x64-linux
restart: unless-stopped
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't expose this port publicly!
@@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
@@ -100,20 +98,20 @@ Example usage:
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
The `sist2app/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language
@@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
<details>
<summary>Screenshot</summary>
@@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
@@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
3. Install vcpkg dependencies
```bash
@@ -223,9 +221,9 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
4. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
cmake -DSIST_DEBUG=off -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .
make
```

View File

@@ -18,7 +18,7 @@ services:
container_name: sist2-admin
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!

View File

@@ -1,5 +1,16 @@
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
MAGIC_PATHS = [
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -449,4 +449,5 @@ image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
image/x-epson-erf, erf
image/jp2, jp2
1 application/x-matlab-data mat
449 image/x-sony-arw arw
450 image/x-sony-sr2 sr2
451 image/x-sony-srf srf
452 image/x-epson-erf erf
453 image/jp2 jp2

View File

@@ -93,7 +93,9 @@ export default {
threads: "Number of threads",
batchSize: "Index batch size",
script: "User script",
searchIndex: "Search index file location"
searchIndex: "Search index file location",
esMappings: "Elasticsearch mappings file override",
esSettings: "Elasticsearch settings file override"
},
scanOptions: {
title: "Scanning options",

View File

@@ -44,6 +44,12 @@
<label>{{ $t("backendOptions.batchSize") }}</label>
<b-form-input v-model="backend.batch_size" type="number" min="1" @change="update()"></b-form-input>
<label>{{ $t("backendOptions.esMappings") }}</label>
<b-form-textarea v-model="backend.es_mappings" rows="4" @change="update()"></b-form-textarea>
<label>{{ $t("backendOptions.esSettings") }}</label>
<b-form-textarea v-model="backend.es_settings" rows="4" @change="update()"></b-form-textarea>
</template>
<template v-else>
<label>{{ $t("backendOptions.searchIndex") }}</label>

View File

@@ -4,4 +4,4 @@ uvicorn
websockets
pycron
GitPython
git+https://github.com/sist2app/sist2-python.git
git+https://github.com/sist2app/sist2-python.git@2.1

View File

@@ -220,7 +220,7 @@ class Sist2IndexTask(Sist2Task):
except ProcessLookupError:
pass
try:
os.wait()
os.waitpid(pid, 0)
except ChildProcessError:
pass

View File

@@ -10,7 +10,7 @@ from logging import FileHandler, StreamHandler
from subprocess import Popen, PIPE
from tempfile import NamedTemporaryFile
from threading import Thread
from typing import List
from typing import List, Optional
from pydantic import BaseModel
@@ -40,6 +40,8 @@ class Sist2SearchBackend(BaseModel):
es_url: str = "http://elasticsearch:9200"
es_insecure_ssl: bool = False
es_mappings: Optional[str] = None
es_settings: Optional[str] = None
es_index: str = "sist2"
threads: int = 1
batch_size: int = 70
@@ -57,6 +59,8 @@ class IndexOptions(BaseModel):
path: str = None
incremental_index: bool = True
search_backend: str = None
es_mappings_file: Optional[str] = None
es_settings_file: Optional[str] = None
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -75,6 +79,12 @@ class IndexOptions(BaseModel):
if search_backend.es_insecure_ssl:
args.append(f"--es-insecure-ssl")
if self.es_mappings_file:
args.append(f"--mappings-file={self.es_mappings_file}")
if self.es_settings_file:
args.append(f"--settings-file={self.es_settings_file}")
if self.incremental_index:
args.append(f"--incremental-index")
@@ -249,6 +259,20 @@ class Sist2:
def index(self, options: IndexOptions, search_backend: Sist2SearchBackend, logs_cb, set_pid_cb):
if search_backend.es_mappings:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(search_backend.es_mappings)
options.es_mappings_file = f.name
else:
options.es_mappings_file = None
if search_backend.es_settings:
with NamedTemporaryFile("w", prefix="sist2-admin", suffix=".txt", delete=False) as f:
f.write(search_backend.es_settings)
options.es_settings_file = f.name
else:
options.es_settings_file = None
args = [
self.bin_path,
*options.args(search_backend),

View File

@@ -69,7 +69,8 @@ class Sist2Api {
hit._props.isImage = true;
}
if ("width" in hit._source && !hit._props.isSubDocument && hit._source.videoc !== "tiff"
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm") {
&& hit._source.videoc !== "raw" && hit._source.videoc !== "ppm"
&& hit._source.mime !== "image/jp2") {
hit._props.isPlayableImage = true;
}
if ("width" in hit._source && "height" in hit._source) {
@@ -309,7 +310,7 @@ class Sist2Api {
}
getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`)
return axios.get(`${this.baseUrl}fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
@@ -566,7 +567,7 @@ class Sist2Api {
}
getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
return axios.get(`${this.baseUrl}fts/d/${sid}`)
.then(resp => ({
_source: resp.data
}));
@@ -589,7 +590,7 @@ class Sist2Api {
}
getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
@@ -620,7 +621,7 @@ class Sist2Api {
}
getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data));
}
}

View File

@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./",
NULL
};

View File

@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX];
strcpy(buf, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
SET_CURRENT_JOB(ipc_ctx, current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
}

View File

@@ -64,6 +64,8 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct {
double date_min;
double date_max;

View File

@@ -295,166 +295,167 @@ image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_csv=590188,
text_html=590189 | 0x01000000,
text_javascript=590190,
text_mcf=590191,
text_pascal=590192,
text_plain=590193,
text_richtext=590194,
text_rtf=590195,
text_scriplet=590196,
text_tab_separated_values=590197,
text_troff=590198,
text_uri_list=590199,
text_vnd_abc=590200,
text_vnd_fmi_flexstor=590201,
text_vnd_wap_wml=590202,
text_vnd_wap_wmlscript=590203,
text_webviewhtml=590204,
text_x_Algol68=590205,
text_x_asm=590206,
text_x_audiosoft_intra=590207,
text_x_awk=590208,
text_x_bcpl=590209,
text_x_c=590210,
text_x_c__=590211,
text_x_component=590212,
text_x_diff=590213,
text_x_fortran=590214,
text_x_java=590215,
text_x_la_asf=590216,
text_x_lisp=590217,
text_x_m=590218,
text_x_m4=590219,
text_x_makefile=590220,
text_x_ms_regedit=590221,
text_x_msdos_batch=590222,
text_x_objective_c=590223,
text_x_pascal=590224,
text_x_perl=590225,
text_x_php=590226,
text_x_po=590227,
text_x_python=590228,
text_x_ruby=590229,
text_x_sass=590230,
text_x_script_python=590231,
text_x_scss=590232,
text_x_server_parsed_html=590233,
text_x_setext=590234,
text_x_sgml=590235 | 0x01000000,
text_x_shellscript=590236,
text_x_speech=590237,
text_x_tcl=590238,
text_x_tex=590239,
text_x_uil=590240,
text_x_uuencode=590241,
text_x_vcalendar=590242,
text_x_vcard=590243,
text_xml=590244 | 0x01000000,
video_MP2T=393637,
video_animaflex=393638,
video_avi=393639,
video_avs_video=393640,
video_mp4=393641,
video_mpeg=393642,
video_quicktime=393643,
video_vdo=393644,
video_vivo=393645,
video_vnd_rn_realvideo=393646,
video_vosaic=393647,
video_webm=393648,
video_x_amt_demorun=393649,
video_x_amt_showrun=393650,
video_x_atomic3d_feature=393651,
video_x_dl=393652,
video_x_dv=393653,
video_x_fli=393654,
video_x_flv=393655,
video_x_isvideo=393656,
video_x_jng=393657 | 0x80000000,
video_x_m4v=393658,
video_x_matroska=393659,
video_x_mng=393660,
video_x_motion_jpeg=393661,
video_x_ms_asf=393662,
video_x_msvideo=393663,
video_x_qtc=393664,
video_x_sgi_movie=393665,
x_epoc_x_sisx_app=721346,
image_jp2=524579,
image_jpeg=524580,
image_jutvision=524581,
image_naplps=524582,
image_pict=524583,
image_png=524584,
image_svg=524585 | 0x80000000,
image_svg_xml=524586 | 0x80000000,
image_tiff=524587,
image_vnd_adobe_photoshop=524588 | 0x80000000,
image_vnd_djvu=524589 | 0x80000000,
image_vnd_fpx=524590,
image_vnd_microsoft_icon=524591,
image_vnd_rn_realflash=524592,
image_vnd_rn_realpix=524593,
image_vnd_wap_wbmp=524594,
image_vnd_xiff=524595,
image_webp=524596,
image_wmf=524597,
image_x_3ds=524598,
image_x_adobe_dng=524599 | 0x00800000,
image_x_award_bioslogo=524600,
image_x_canon_cr2=524601 | 0x00800000,
image_x_canon_crw=524602 | 0x00800000,
image_x_cmu_raster=524603,
image_x_cur=524604,
image_x_dcraw=524605 | 0x00800000,
image_x_dwg=524606,
image_x_eps=524607,
image_x_epson_erf=524608 | 0x00800000,
image_x_exr=524609,
image_x_fuji_raf=524610 | 0x00800000,
image_x_gem=524611,
image_x_icns=524612,
image_x_icon=524613 | 0x80000000,
image_x_jg=524614,
image_x_jps=524615,
image_x_kodak_dcr=524616 | 0x00800000,
image_x_kodak_k25=524617 | 0x00800000,
image_x_kodak_kdc=524618 | 0x00800000,
image_x_minolta_mrw=524619 | 0x00800000,
image_x_ms_bmp=524620,
image_x_niff=524621,
image_x_nikon_nef=524622 | 0x00800000,
image_x_olympus_orf=524623 | 0x00800000,
image_x_panasonic_raw=524624 | 0x00800000,
image_x_pcx=524625,
image_x_pentax_pef=524626 | 0x00800000,
image_x_pict=524627,
image_x_portable_bitmap=524628,
image_x_portable_graymap=524629,
image_x_portable_pixmap=524630,
image_x_quicktime=524631,
image_x_rgb=524632,
image_x_sigma_x3f=524633 | 0x00800000,
image_x_sony_arw=524634 | 0x00800000,
image_x_sony_sr2=524635 | 0x00800000,
image_x_sony_srf=524636 | 0x00800000,
image_x_tga=524637,
image_x_tiff=524638,
image_x_win_bitmap=524639,
image_x_xcf=524640 | 0x80000000,
image_x_xpixmap=524641 | 0x80000000,
image_x_xwindowdump=524642,
message_news=196963,
message_rfc822=196964,
model_vnd_dwf=65893,
model_vnd_gdl=65894,
model_vnd_gs_gdl=65895,
model_vrml=65896,
model_x_pov=65897,
text_PGP=590186,
text_asp=590187,
text_css=590188,
text_csv=590189,
text_html=590190 | 0x01000000,
text_javascript=590191,
text_mcf=590192,
text_pascal=590193,
text_plain=590194,
text_richtext=590195,
text_rtf=590196,
text_scriplet=590197,
text_tab_separated_values=590198,
text_troff=590199,
text_uri_list=590200,
text_vnd_abc=590201,
text_vnd_fmi_flexstor=590202,
text_vnd_wap_wml=590203,
text_vnd_wap_wmlscript=590204,
text_webviewhtml=590205,
text_x_Algol68=590206,
text_x_asm=590207,
text_x_audiosoft_intra=590208,
text_x_awk=590209,
text_x_bcpl=590210,
text_x_c=590211,
text_x_c__=590212,
text_x_component=590213,
text_x_diff=590214,
text_x_fortran=590215,
text_x_java=590216,
text_x_la_asf=590217,
text_x_lisp=590218,
text_x_m=590219,
text_x_m4=590220,
text_x_makefile=590221,
text_x_ms_regedit=590222,
text_x_msdos_batch=590223,
text_x_objective_c=590224,
text_x_pascal=590225,
text_x_perl=590226,
text_x_php=590227,
text_x_po=590228,
text_x_python=590229,
text_x_ruby=590230,
text_x_sass=590231,
text_x_script_python=590232,
text_x_scss=590233,
text_x_server_parsed_html=590234,
text_x_setext=590235,
text_x_sgml=590236 | 0x01000000,
text_x_shellscript=590237,
text_x_speech=590238,
text_x_tcl=590239,
text_x_tex=590240,
text_x_uil=590241,
text_x_uuencode=590242,
text_x_vcalendar=590243,
text_x_vcard=590244,
text_xml=590245 | 0x01000000,
video_MP2T=393638,
video_animaflex=393639,
video_avi=393640,
video_avs_video=393641,
video_mp4=393642,
video_mpeg=393643,
video_quicktime=393644,
video_vdo=393645,
video_vivo=393646,
video_vnd_rn_realvideo=393647,
video_vosaic=393648,
video_webm=393649,
video_x_amt_demorun=393650,
video_x_amt_showrun=393651,
video_x_atomic3d_feature=393652,
video_x_dl=393653,
video_x_dv=393654,
video_x_fli=393655,
video_x_flv=393656,
video_x_isvideo=393657,
video_x_jng=393658 | 0x80000000,
video_x_m4v=393659,
video_x_matroska=393660,
video_x_mng=393661,
video_x_motion_jpeg=393662,
video_x_ms_asf=393663,
video_x_msvideo=393664,
video_x_qtc=393665,
video_x_sgi_movie=393666,
x_epoc_x_sisx_app=721347,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_x_matlab_data: return "application/x-matlab-data";
@@ -908,6 +909,7 @@ case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
case image_jp2: return "image/jp2";
default: return NULL;}}
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
case 2495639202:return application_x_matlab_data;
@@ -1291,6 +1293,7 @@ case 1698465774:return image_x_sony_arw;
case 2083014127:return image_x_sony_sr2;
case 271503362:return image_x_sony_srf;
case 142938048:return image_x_epson_erf;
case 1575600018:return image_jp2;
default: return 0;}}
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
case 3272851765: return application_x_matlab_data;
@@ -1744,7 +1747,8 @@ case 3060720351: return image_x_sony_arw;
case 2944016606: return image_x_sony_sr2;
case 3279729971: return image_x_sony_srf;
case 1665206815: return image_x_epson_erf;
case 1849479005: return image_jp2;
default: return 0;}}
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524579,524580,524581,524582,524583,524584 | 0x80000000,524585 | 0x80000000,524586,524587 | 0x80000000,524588 | 0x80000000,524589,524590,524591,524592,524593,524594,524595,524596,524597,524599,524602,524603,524605,524606,524608,524610,524611,524612 | 0x80000000,524613,524614,524619,524620,524624,524626,524627,524628,524629,524630,524631,524636,524637,524638,524639 | 0x80000000,524640 | 0x80000000,524641,196962,196963,65892,65893,65894,65895,65896,590186,590187,590189 | 0x01000000,590190,590191,590192,590185,590193,590231,590188,655410,590194,590195,590196,590197,590198,590199,590200,590201,590203,590202,590204,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590219,590220,590244 | 0x01000000,590218,590222,590221,590223,590224,590225,590226,590227,590228,590229,590230,590232,590233,590234,590235 | 0x01000000,590236,590237,590238,590239,590240,590241,590242,590243,393638,393639,393640,393637,393641,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657 | 0x80000000,393658,393659,393660,393661,393662,393663,393664,393665,721346,655598,655420,524622 | 0x00800000,524621 | 0x00800000,524609 | 0x00800000,524623 | 0x00800000,524598 | 0x00800000,524600 | 0x00800000,524601 | 0x00800000,524604 | 0x00800000,524615 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524625 | 0x00800000,524632 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524607 | 0x00800000,0};
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524580,524581,524582,524583,524584,524585 | 0x80000000,524586 | 0x80000000,524587,524588 | 0x80000000,524589 | 0x80000000,524590,524591,524592,524593,524594,524595,524596,524597,524598,524600,524603,524604,524606,524607,524609,524611,524612,524613 | 0x80000000,524614,524615,524620,524621,524625,524627,524628,524629,524630,524631,524632,524637,524638,524639,524640 | 0x80000000,524641 | 0x80000000,524642,196963,196964,65893,65894,65895,65896,65897,590187,590188,590190 | 0x01000000,590191,590192,590193,590186,590194,590232,590189,655410,590195,590196,590197,590198,590199,590200,590201,590202,590204,590203,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590218,590220,590221,590245 | 0x01000000,590219,590223,590222,590224,590225,590226,590227,590228,590229,590230,590231,590233,590234,590235,590236 | 0x01000000,590237,590238,590239,590240,590241,590242,590243,590244,393639,393640,393641,393638,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657,393658 | 0x80000000,393659,393660,393661,393662,393663,393664,393665,393666,721347,655598,655420,524623 | 0x00800000,524622 | 0x00800000,524610 | 0x00800000,524624 | 0x00800000,524599 | 0x00800000,524601 | 0x00800000,524602 | 0x00800000,524605 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524619 | 0x00800000,524626 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524636 | 0x00800000,524608 | 0x00800000,524579,0};
unsigned int* get_mime_ids() { return mime_ids; }
#endif

View File

@@ -142,6 +142,10 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
}
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath);

View File

@@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.4.2"
#define VERSION "3.4.6"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 2;
static const int VersionPatch = 6;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown

View File

@@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE;
}
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
}
@@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
}

View File

@@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
if (meta_contains_key(doc->meta_head, key)) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
key, tag->value);
return;
}
text_buffer_t tex = text_buffer_create(-1);
@@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}

View File

@@ -172,6 +172,8 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h"

View File

@@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job;
}
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif