Compare commits

...

19 Commits

Author SHA1 Message Date
Shy
509770ee24 Version bump 2025-07-03 22:35:18 -04:00
Shy
c319547b1e Add image/jp2 mime type 2025-07-03 22:32:03 -04:00
Shy
04f993be75 Version bump 2025-06-12 21:04:35 -04:00
Shy
ab9eab3536 Merge pull request #527 from RickConsole/fix/index-hanging
fixed occasional hanging when indexing
2025-06-12 20:19:53 -04:00
Rick Console
8bb12f8ae2 fixed occasional hanging when indexing 2025-04-18 16:16:38 -04:00
Shy
670dad185e Fix #521 2025-03-19 19:22:17 -04:00
Shy
bbbd727e6a Update sist2-python version 2025-03-19 18:38:21 -04:00
Shy
d800effad9 Merge pull request #511 from dpieski/patch-5
Update README.md
2025-02-06 17:58:36 -05:00
Shy
371e9c408e Merge pull request #512 from dpieski/patch-6
Update README.md
2025-02-06 17:58:07 -05:00
Andrew
ee1b1d8bb4 Update README.md
Moved README references from simon987 to sist2app
2025-02-03 15:09:11 -06:00
Andrew
63a097a463 Update README.md
Update to the docker-compose.yml example.
2025-02-03 15:00:03 -06:00
Shy
7a03a2202e Fix #481 2025-01-24 19:40:08 -05:00
Shy
050fc500ce Fix #462 2025-01-24 19:22:01 -05:00
Shy
d44679131b Update compose file to avoid confusion. Fixes #490 2025-01-23 21:45:01 -05:00
Shy
4dd5e70406 Fix #492 2025-01-23 21:40:37 -05:00
Shy
5a82581992 Fix magic database problem 2025-01-23 21:40:27 -05:00
Shy
0dc18a56c0 Fix #509 2025-01-23 19:10:17 -05:00
Shy
258b2e31e6 Version bump 2025-01-23 19:10:02 -05:00
Shy
c726074029 Update tessdata paths 2025-01-23 19:09:54 -05:00
19 changed files with 254 additions and 207 deletions

View File

@@ -89,7 +89,7 @@ target_include_directories(
target_compile_options(
sist2
PRIVATE
-fPIC
# -fPIC
)
if (SIST_DEBUG)
@@ -147,7 +147,7 @@ add_dependencies(
target_link_libraries(
sist2
m
# m
z
argparse
unofficial::mongoose::mongoose

View File

@@ -1,5 +1,5 @@
![GitHub](https://img.shields.io/github/license/simon987/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/simon987/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/simon987/sist2)
![GitHub](https://img.shields.io/github/license/sist2app/sist2.svg)
[![CodeFactor](https://www.codefactor.io/repository/github/sist2app/sist2/badge?s=05daa325188aac4eae32c786f3d9cf4e0593f822)](https://www.codefactor.io/repository/github/sist2app/sist2)
[![Development snapshots](https://ci.simon987.net/api/badges/simon987/sist2/status.svg)](https://files.simon987.net/.gate/sist2/simon987_sist2/)
**Demo**: [sist2.simon987.net](https://sist2.simon987.net/)
@@ -38,8 +38,6 @@ sist2 (Simple incremental search tool)
### Using Docker Compose *(Windows/Linux/Mac)*
```yaml
version: "3"
services:
elasticsearch:
image: elasticsearch:7.17.9
@@ -53,11 +51,11 @@ services:
- "PUID=1000"
- "PGID=1000"
sist2-admin:
image: simon987/sist2:3.4.2-x64-linux
image: sist2app/sist2:x64-linux
restart: unless-stopped
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't expose this port publicly!
@@ -81,7 +79,7 @@ Navigate to http://localhost:8080/ to configure sist2-admin.
```
* **SQLite**: No installation required
2. Download the [latest sist2 release](https://github.com/simon987/sist2/releases).
2. Download the [latest sist2 release](https://github.com/sist2app/sist2/releases).
Select the file corresponding to your CPU architecture and mark the binary as executable with `chmod +x`.
3. See [usage guide](docs/USAGE.md) for command line usage.
@@ -100,20 +98,20 @@ Example usage:
| File type | Library | Content | Thumbnail | Metadata |
|:--------------------------------------------------------------------------|:-----------------------------------------------------------------------------|:---------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------|
| pdf,xps,fb2,epub | MuPDF | text+ocr | yes | author, title |
| cbz,cbr | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | - | yes | - |
| cbz,cbr | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | - | yes | - |
| `audio/*` | ffmpeg | - | yes | ID3 tags |
| `video/*` | ffmpeg | - | yes | title, comment, artist |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/simon987/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| `image/*` | ffmpeg | ocr | yes | [Common EXIF tags](https://github.com/sist2app/sist2/blob/efdde2734eca9b14a54f84568863b7ffd59bdba3/src/parsing/media.c#L190), GPS tags |
| raw, rw2, dng, cr2, crw, dcr, k25, kdc, mrw, pef, xf3, arw, sr2, srf, erf | LibRaw | no | yes | Common EXIF tags, GPS tags |
| ttf,ttc,cff,woff,fnt,otf | Freetype2 | - | yes, `bmp` | Name & style |
| `text/plain` | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | no | - |
| `text/plain` | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| html, xml | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | no | - |
| tar, zip, rar, 7z, ar ... | Libarchive | yes\* | - | no |
| docx, xlsx, pptx | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| docx, xlsx, pptx | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | if embedded | creator, modified_by, title |
| doc (MS Word 97-2003) | antiword | yes | no | author, title |
| mobi, azw, azw3 | libmobi | yes | yes | author, title |
| wpd (WordPerfect) | libwpd | yes | no | *planned* |
| json, jsonl, ndjson | [libscan](https://github.com/simon987/sist2/tree/master/third-party/libscan) | yes | - | - |
| json, jsonl, ndjson | [libscan](https://github.com/sist2app/sist2/tree/master/third-party/libscan) | yes | - | - |
\* *See [Archive files](#archive-files)*
@@ -137,7 +135,7 @@ You can enable OCR support for ebook (pdf,xps,fb2,epub) or image file types with
Download the language data files with your package manager (`apt install tesseract-ocr-eng`) or
directly [from Github](https://github.com/tesseract-ocr/tesseract/wiki/Data-Files).
The `simon987/sist2` image comes with common languages
The `sist2app/sist2` image comes with common languages
(hin, jpn, eng, fra, rus, spa, chi_sim, deu, pol) pre-installed.
You can use the `+` separator to specify multiple languages. The language
@@ -177,13 +175,13 @@ sist2 v3.0.4+ supports named-entity recognition (NER). Simply add a supported re
to enable it.
The text processing is done in your browser, no data is sent to any third-party services.
See [simon987/sist2-ner-models](https://github.com/simon987/sist2-ner-models) for more details.
See [sist2app/sist2-ner-models](https://github.com/sist2app/sist2-ner-models) for more details.
#### List of available repositories:
| URL | Maintainer | Purpose |
|---------------------------------------------------------------------------------------------------------|-----------------------------------------|---------|
| [simon987/sist2-ner-models](https://raw.githubusercontent.com/simon987/sist2-ner-models/main/repo.json) | [simon987](https://github.com/simon987) | General |
| [sist2app/sist2-ner-models](https://raw.githubusercontent.com/sist2app/sist2-ner-models/main/repo.json) | [sist2app](https://github.com/sist2app) | General |
<details>
<summary>Screenshot</summary>
@@ -199,7 +197,7 @@ You can compile **sist2** by yourself if you don't want to use the pre-compiled
### Using docker
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
cd sist2
docker build . -t my-sist2-image
# Copy sist2 executable from docker image
@@ -214,7 +212,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
apt install gcc g++ python3 yasm ragel automake autotools-dev wget libtool libssl-dev curl zip unzip tar xorg-dev libglu1-mesa-dev libxcursor-dev libxml2-dev libxinerama-dev gettext nasm git nodejs
```
2. Install vcpkg using my fork: https://github.com/simon987/vcpkg
2. Install vcpkg using my fork: https://github.com/sist2app/vcpkg
3. Install vcpkg dependencies
```bash
@@ -223,7 +221,7 @@ docker run --rm --entrypoint cat my-sist2-image /root/sist2 > sist2-x64-linux
4. Build
```bash
git clone --recursive https://github.com/simon987/sist2/
git clone --recursive https://github.com/sist2app/sist2/
(cd sist2-vue; npm install; npm run build)
(cd sist2-admin/frontend; npm install; npm run build)
cmake -DSIST_DEBUG=off -DCMAKE_TOOLCHAIN_FILE=<VCPKG_ROOT>/scripts/buildsystems/vcpkg.cmake .

View File

@@ -18,7 +18,7 @@ services:
container_name: sist2-admin
volumes:
- /data/sist2-admin-data/:/sist2-admin/
- /:/host
- /<path to index>/:/host
ports:
- 4090:4090
# NOTE: Don't export this port publicly!

View File

@@ -1,5 +1,16 @@
with open("/usr/lib/file/magic.mgc", "rb") as f:
data = f.read()
MAGIC_PATHS = [
"/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/work/vcpkg/installed/x64-linux/share/libmagic/misc/magic.mgc",
"/usr/lib/file/magic.mgc"
]
for path in MAGIC_PATHS:
try:
with open(path, "rb") as f:
data = f.read()
break
except:
continue
print("char magic_database_buffer[%d] = {%s};" % (len(data), ",".join(str(int(b)) for b in data)))

View File

@@ -449,4 +449,5 @@ image/x-sigma-x3f, xf3
image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
image/x-epson-erf, erf
image/jp2, jp2
1 application/x-matlab-data mat
449 image/x-sony-arw arw
450 image/x-sony-sr2 sr2
451 image/x-sony-srf srf
452 image/x-epson-erf erf
453 image/jp2 jp2

View File

@@ -4,4 +4,4 @@ uvicorn
websockets
pycron
GitPython
git+https://github.com/sist2app/sist2-python.git
git+https://github.com/sist2app/sist2-python.git@2.1

View File

@@ -220,7 +220,7 @@ class Sist2IndexTask(Sist2Task):
except ProcessLookupError:
pass
try:
os.wait()
os.waitpid(pid, 0)
except ChildProcessError:
pass

View File

@@ -309,7 +309,7 @@ class Sist2Api {
}
getTagsSqlite() {
return axios.get(`${this.baseUrl}/fts/tags`)
return axios.get(`${this.baseUrl}fts/tags`)
.then(resp => {
return resp.data.map(tag => this._createEsTag(tag.tag, tag.count))
});
@@ -566,7 +566,7 @@ class Sist2Api {
}
getDocumentSqlite(sid) {
return axios.get(`${this.baseUrl}/fts/d/${sid}`)
return axios.get(`${this.baseUrl}fts/d/${sid}`)
.then(resp => ({
_source: resp.data
}));
@@ -589,7 +589,7 @@ class Sist2Api {
}
getTagSuggestionsSqlite(prefix) {
return axios.post(`${this.baseUrl}/fts/suggestTags`, prefix)
return axios.post(`${this.baseUrl}fts/suggestTags`, prefix)
.then(resp => (resp.data));
}
@@ -620,7 +620,7 @@ class Sist2Api {
}
getEmbeddings(sid, modelId) {
return axios.post(`${this.baseUrl}/e/${sid}/${modelId.toString().padStart(3, '0')}`)
return axios.post(`${this.baseUrl}e/${sid}/${modelId.toString().padStart(3, '0')}`)
.then(resp => (resp.data));
}
}

View File

@@ -117,11 +117,11 @@ class Sist2ElasticsearchQuery {
}
if (dateMin && dateMax) {
filters.push({range: {mtime: {gte: dateMin, lte: dateMax}}})
filters.push({range: {mtime: {gte: dateMin, lte: dateMax, format: "epoch_second"}}})
} else if (dateMin) {
filters.push({range: {mtime: {gte: dateMin}}})
filters.push({range: {mtime: {gte: dateMin, format: "epoch_second"}}})
} else if (dateMax) {
filters.push({range: {mtime: {lte: dateMax}}})
filters.push({range: {mtime: {lte: dateMax, format: "epoch_second"}}})
}
const path = pathText.replace(/\/$/, "").toLowerCase(); //remove trailing slashes

View File

@@ -25,6 +25,7 @@ const char *TESS_DATAPATHS[] = {
"/usr/share/tessdata/",
"/usr/share/tesseract-ocr/tessdata/",
"/usr/share/tesseract-ocr/4.00/tessdata/",
"/usr/share/tesseract-ocr/5/tessdata/",
"./",
NULL
};

View File

@@ -114,7 +114,7 @@ void save_current_job_info(sqlite3_context *ctx, int argc, sqlite3_value **argv)
char buf[PATH_MAX];
strcpy(buf, current_job);
strcpy(ipc_ctx->current_job[ProcData.thread_id], current_job);
SET_CURRENT_JOB(ipc_ctx, current_job);
sqlite3_result_text(ctx, "ok", -1, SQLITE_STATIC);
}

View File

@@ -64,6 +64,8 @@ typedef struct {
char current_job[MAX_THREADS][PATH_MAX * 2];
} database_ipc_ctx_t;
#define SET_CURRENT_JOB(ctx, job) (strcpy((ctx)->current_job[ProcData.thread_id], job))
typedef struct {
double date_min;
double date_max;

View File

@@ -295,166 +295,167 @@ image_g3fax=524575,
image_gif=524576,
image_heic=524577,
image_ief=524578,
image_jpeg=524579,
image_jutvision=524580,
image_naplps=524581,
image_pict=524582,
image_png=524583,
image_svg=524584 | 0x80000000,
image_svg_xml=524585 | 0x80000000,
image_tiff=524586,
image_vnd_adobe_photoshop=524587 | 0x80000000,
image_vnd_djvu=524588 | 0x80000000,
image_vnd_fpx=524589,
image_vnd_microsoft_icon=524590,
image_vnd_rn_realflash=524591,
image_vnd_rn_realpix=524592,
image_vnd_wap_wbmp=524593,
image_vnd_xiff=524594,
image_webp=524595,
image_wmf=524596,
image_x_3ds=524597,
image_x_adobe_dng=524598 | 0x00800000,
image_x_award_bioslogo=524599,
image_x_canon_cr2=524600 | 0x00800000,
image_x_canon_crw=524601 | 0x00800000,
image_x_cmu_raster=524602,
image_x_cur=524603,
image_x_dcraw=524604 | 0x00800000,
image_x_dwg=524605,
image_x_eps=524606,
image_x_epson_erf=524607 | 0x00800000,
image_x_exr=524608,
image_x_fuji_raf=524609 | 0x00800000,
image_x_gem=524610,
image_x_icns=524611,
image_x_icon=524612 | 0x80000000,
image_x_jg=524613,
image_x_jps=524614,
image_x_kodak_dcr=524615 | 0x00800000,
image_x_kodak_k25=524616 | 0x00800000,
image_x_kodak_kdc=524617 | 0x00800000,
image_x_minolta_mrw=524618 | 0x00800000,
image_x_ms_bmp=524619,
image_x_niff=524620,
image_x_nikon_nef=524621 | 0x00800000,
image_x_olympus_orf=524622 | 0x00800000,
image_x_panasonic_raw=524623 | 0x00800000,
image_x_pcx=524624,
image_x_pentax_pef=524625 | 0x00800000,
image_x_pict=524626,
image_x_portable_bitmap=524627,
image_x_portable_graymap=524628,
image_x_portable_pixmap=524629,
image_x_quicktime=524630,
image_x_rgb=524631,
image_x_sigma_x3f=524632 | 0x00800000,
image_x_sony_arw=524633 | 0x00800000,
image_x_sony_sr2=524634 | 0x00800000,
image_x_sony_srf=524635 | 0x00800000,
image_x_tga=524636,
image_x_tiff=524637,
image_x_win_bitmap=524638,
image_x_xcf=524639 | 0x80000000,
image_x_xpixmap=524640 | 0x80000000,
image_x_xwindowdump=524641,
message_news=196962,
message_rfc822=196963,
model_vnd_dwf=65892,
model_vnd_gdl=65893,
model_vnd_gs_gdl=65894,
model_vrml=65895,
model_x_pov=65896,
text_PGP=590185,
text_asp=590186,
text_css=590187,
text_csv=590188,
text_html=590189 | 0x01000000,
text_javascript=590190,
text_mcf=590191,
text_pascal=590192,
text_plain=590193,
text_richtext=590194,
text_rtf=590195,
text_scriplet=590196,
text_tab_separated_values=590197,
text_troff=590198,
text_uri_list=590199,
text_vnd_abc=590200,
text_vnd_fmi_flexstor=590201,
text_vnd_wap_wml=590202,
text_vnd_wap_wmlscript=590203,
text_webviewhtml=590204,
text_x_Algol68=590205,
text_x_asm=590206,
text_x_audiosoft_intra=590207,
text_x_awk=590208,
text_x_bcpl=590209,
text_x_c=590210,
text_x_c__=590211,
text_x_component=590212,
text_x_diff=590213,
text_x_fortran=590214,
text_x_java=590215,
text_x_la_asf=590216,
text_x_lisp=590217,
text_x_m=590218,
text_x_m4=590219,
text_x_makefile=590220,
text_x_ms_regedit=590221,
text_x_msdos_batch=590222,
text_x_objective_c=590223,
text_x_pascal=590224,
text_x_perl=590225,
text_x_php=590226,
text_x_po=590227,
text_x_python=590228,
text_x_ruby=590229,
text_x_sass=590230,
text_x_script_python=590231,
text_x_scss=590232,
text_x_server_parsed_html=590233,
text_x_setext=590234,
text_x_sgml=590235 | 0x01000000,
text_x_shellscript=590236,
text_x_speech=590237,
text_x_tcl=590238,
text_x_tex=590239,
text_x_uil=590240,
text_x_uuencode=590241,
text_x_vcalendar=590242,
text_x_vcard=590243,
text_xml=590244 | 0x01000000,
video_MP2T=393637,
video_animaflex=393638,
video_avi=393639,
video_avs_video=393640,
video_mp4=393641,
video_mpeg=393642,
video_quicktime=393643,
video_vdo=393644,
video_vivo=393645,
video_vnd_rn_realvideo=393646,
video_vosaic=393647,
video_webm=393648,
video_x_amt_demorun=393649,
video_x_amt_showrun=393650,
video_x_atomic3d_feature=393651,
video_x_dl=393652,
video_x_dv=393653,
video_x_fli=393654,
video_x_flv=393655,
video_x_isvideo=393656,
video_x_jng=393657 | 0x80000000,
video_x_m4v=393658,
video_x_matroska=393659,
video_x_mng=393660,
video_x_motion_jpeg=393661,
video_x_ms_asf=393662,
video_x_msvideo=393663,
video_x_qtc=393664,
video_x_sgi_movie=393665,
x_epoc_x_sisx_app=721346,
image_jp2=524579,
image_jpeg=524580,
image_jutvision=524581,
image_naplps=524582,
image_pict=524583,
image_png=524584,
image_svg=524585 | 0x80000000,
image_svg_xml=524586 | 0x80000000,
image_tiff=524587,
image_vnd_adobe_photoshop=524588 | 0x80000000,
image_vnd_djvu=524589 | 0x80000000,
image_vnd_fpx=524590,
image_vnd_microsoft_icon=524591,
image_vnd_rn_realflash=524592,
image_vnd_rn_realpix=524593,
image_vnd_wap_wbmp=524594,
image_vnd_xiff=524595,
image_webp=524596,
image_wmf=524597,
image_x_3ds=524598,
image_x_adobe_dng=524599 | 0x00800000,
image_x_award_bioslogo=524600,
image_x_canon_cr2=524601 | 0x00800000,
image_x_canon_crw=524602 | 0x00800000,
image_x_cmu_raster=524603,
image_x_cur=524604,
image_x_dcraw=524605 | 0x00800000,
image_x_dwg=524606,
image_x_eps=524607,
image_x_epson_erf=524608 | 0x00800000,
image_x_exr=524609,
image_x_fuji_raf=524610 | 0x00800000,
image_x_gem=524611,
image_x_icns=524612,
image_x_icon=524613 | 0x80000000,
image_x_jg=524614,
image_x_jps=524615,
image_x_kodak_dcr=524616 | 0x00800000,
image_x_kodak_k25=524617 | 0x00800000,
image_x_kodak_kdc=524618 | 0x00800000,
image_x_minolta_mrw=524619 | 0x00800000,
image_x_ms_bmp=524620,
image_x_niff=524621,
image_x_nikon_nef=524622 | 0x00800000,
image_x_olympus_orf=524623 | 0x00800000,
image_x_panasonic_raw=524624 | 0x00800000,
image_x_pcx=524625,
image_x_pentax_pef=524626 | 0x00800000,
image_x_pict=524627,
image_x_portable_bitmap=524628,
image_x_portable_graymap=524629,
image_x_portable_pixmap=524630,
image_x_quicktime=524631,
image_x_rgb=524632,
image_x_sigma_x3f=524633 | 0x00800000,
image_x_sony_arw=524634 | 0x00800000,
image_x_sony_sr2=524635 | 0x00800000,
image_x_sony_srf=524636 | 0x00800000,
image_x_tga=524637,
image_x_tiff=524638,
image_x_win_bitmap=524639,
image_x_xcf=524640 | 0x80000000,
image_x_xpixmap=524641 | 0x80000000,
image_x_xwindowdump=524642,
message_news=196963,
message_rfc822=196964,
model_vnd_dwf=65893,
model_vnd_gdl=65894,
model_vnd_gs_gdl=65895,
model_vrml=65896,
model_x_pov=65897,
text_PGP=590186,
text_asp=590187,
text_css=590188,
text_csv=590189,
text_html=590190 | 0x01000000,
text_javascript=590191,
text_mcf=590192,
text_pascal=590193,
text_plain=590194,
text_richtext=590195,
text_rtf=590196,
text_scriplet=590197,
text_tab_separated_values=590198,
text_troff=590199,
text_uri_list=590200,
text_vnd_abc=590201,
text_vnd_fmi_flexstor=590202,
text_vnd_wap_wml=590203,
text_vnd_wap_wmlscript=590204,
text_webviewhtml=590205,
text_x_Algol68=590206,
text_x_asm=590207,
text_x_audiosoft_intra=590208,
text_x_awk=590209,
text_x_bcpl=590210,
text_x_c=590211,
text_x_c__=590212,
text_x_component=590213,
text_x_diff=590214,
text_x_fortran=590215,
text_x_java=590216,
text_x_la_asf=590217,
text_x_lisp=590218,
text_x_m=590219,
text_x_m4=590220,
text_x_makefile=590221,
text_x_ms_regedit=590222,
text_x_msdos_batch=590223,
text_x_objective_c=590224,
text_x_pascal=590225,
text_x_perl=590226,
text_x_php=590227,
text_x_po=590228,
text_x_python=590229,
text_x_ruby=590230,
text_x_sass=590231,
text_x_script_python=590232,
text_x_scss=590233,
text_x_server_parsed_html=590234,
text_x_setext=590235,
text_x_sgml=590236 | 0x01000000,
text_x_shellscript=590237,
text_x_speech=590238,
text_x_tcl=590239,
text_x_tex=590240,
text_x_uil=590241,
text_x_uuencode=590242,
text_x_vcalendar=590243,
text_x_vcard=590244,
text_xml=590245 | 0x01000000,
video_MP2T=393638,
video_animaflex=393639,
video_avi=393640,
video_avs_video=393641,
video_mp4=393642,
video_mpeg=393643,
video_quicktime=393644,
video_vdo=393645,
video_vivo=393646,
video_vnd_rn_realvideo=393647,
video_vosaic=393648,
video_webm=393649,
video_x_amt_demorun=393650,
video_x_amt_showrun=393651,
video_x_atomic3d_feature=393652,
video_x_dl=393653,
video_x_dv=393654,
video_x_fli=393655,
video_x_flv=393656,
video_x_isvideo=393657,
video_x_jng=393658 | 0x80000000,
video_x_m4v=393659,
video_x_matroska=393660,
video_x_mng=393661,
video_x_motion_jpeg=393662,
video_x_ms_asf=393663,
video_x_msvideo=393664,
video_x_qtc=393665,
video_x_sgi_movie=393666,
x_epoc_x_sisx_app=721347,
};
char *mime_get_mime_text(unsigned int mime_id) {switch (mime_id) {
case application_x_matlab_data: return "application/x-matlab-data";
@@ -908,6 +909,7 @@ case image_x_sony_arw: return "image/x-sony-arw";
case image_x_sony_sr2: return "image/x-sony-sr2";
case image_x_sony_srf: return "image/x-sony-srf";
case image_x_epson_erf: return "image/x-epson-erf";
case image_jp2: return "image/jp2";
default: return NULL;}}
unsigned int mime_extension_lookup(unsigned long extension_crc32) {switch (extension_crc32) {
case 2495639202:return application_x_matlab_data;
@@ -1291,6 +1293,7 @@ case 1698465774:return image_x_sony_arw;
case 2083014127:return image_x_sony_sr2;
case 271503362:return image_x_sony_srf;
case 142938048:return image_x_epson_erf;
case 1575600018:return image_jp2;
default: return 0;}}
unsigned int mime_name_lookup(unsigned long mime_crc32) {switch (mime_crc32) {
case 3272851765: return application_x_matlab_data;
@@ -1744,7 +1747,8 @@ case 3060720351: return image_x_sony_arw;
case 2944016606: return image_x_sony_sr2;
case 3279729971: return image_x_sony_srf;
case 1665206815: return image_x_epson_erf;
case 1849479005: return image_jp2;
default: return 0;}}
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524579,524580,524581,524582,524583,524584 | 0x80000000,524585 | 0x80000000,524586,524587 | 0x80000000,524588 | 0x80000000,524589,524590,524591,524592,524593,524594,524595,524596,524597,524599,524602,524603,524605,524606,524608,524610,524611,524612 | 0x80000000,524613,524614,524619,524620,524624,524626,524627,524628,524629,524630,524631,524636,524637,524638,524639 | 0x80000000,524640 | 0x80000000,524641,196962,196963,65892,65893,65894,65895,65896,590186,590187,590189 | 0x01000000,590190,590191,590192,590185,590193,590231,590188,655410,590194,590195,590196,590197,590198,590199,590200,590201,590203,590202,590204,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590219,590220,590244 | 0x01000000,590218,590222,590221,590223,590224,590225,590226,590227,590228,590229,590230,590232,590233,590234,590235 | 0x01000000,590236,590237,590238,590239,590240,590241,590242,590243,393638,393639,393640,393637,393641,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657 | 0x80000000,393658,393659,393660,393661,393662,393663,393664,393665,721346,655598,655420,524622 | 0x00800000,524621 | 0x00800000,524609 | 0x00800000,524623 | 0x00800000,524598 | 0x00800000,524600 | 0x00800000,524601 | 0x00800000,524604 | 0x00800000,524615 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524625 | 0x00800000,524632 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524607 | 0x00800000,0};
unsigned int mime_ids[] = {655530,655363,655364,655365,655366,655362,655361,655367,655368,655369,655370,655371,655372 | 0x40000000,655373,655374,655375,655376 | 0x08000000,655377,655378,655379,655380,655382,655381,655383,655384,655390,655385,655386,655387,655388,655389,655391,655392,655393,655394,655395 | 0x40000000,655396,655397,655398,655399,655400,655401,655402,655403,655404,655405,655406,655407,655408,655411,655412,655413,655414,655415,655416,655417,655418,655419 | 0x20000000,655421,655422,655423,655424,655425,655426,655427,655428,655429,655430,655431,655432 | 0x04000000,655433 | 0x04000000,655434 | 0x04000000,655435,655436,655437,655438,655439,655440,655441,655442,655443,655444,655445,655446 | 0x10000000,655447,655448,655449 | 0x10000000,655450,655451,655452,655453,655454,655455,655456,655457,655458,655459,655461 | 0x08000000,655460,655462,655463,655464,655465,655466,655467,655468,655469,655470,655471,655472,655473,655474,655475,655476,655477,655478,655479,655480,1,655481,655482,655483,655484,655485,655486,655487,655488,655489 | 0x20000000,655490,655491,655492,655493,655494,655495,655496,655497,655498,655499,655500,655501,655502,655503,655504,655505,655506,655507,655508,655509,655510,655511,655512,655513,655514,655515,655516,655517,655519,655518 | 0x08000000,655521,655520,655522 | 0x08000000,655523 | 0x08000000,655524 | 0x08000000,655525,655526,655527,655528,655529,655531,655532,655533,655534,655535,655599,655536 | 0x02000000,655409 | 0x02000000,655540,655537,655538,655539,655541,655542,655543,655544,655545,655546,655547,655548,655549,655550,655552,655551,655553,655554,655555,655556,655557,655558,655559,655560,655561,655562 | 0x10000000,655563,655564,655565,655566,655567,655569,655568,655570,655571,655572,655573,655574,655575,655576,655577,655578 | 0x10000000,655579,655580,655581,655583,655582,655584,655585,655586,655587,655588,655589,655590,655591,655592,655593,655594,655595 | 0x08000000,655596,655597 | 0x08000000,655600 | 0x10000000,655601,458994 | 0x80000000,458995,458996,458998,458997,458999,459000,459001,459002,459003,459004,459005,459006,459007,459008,459009,459010,459011,459012,459013,459014,459015,459016,459017,459018,459030,459019,459020,459021,459022,459023,459025,459024,459026,459027,459029 | 0x80000000,459028 | 0x80000000,327959 | 0x20000000,327960 | 0x20000000,327962 | 0x20000000,327961 | 0x20000000,524571,524572,524573,524574,524575,524576,524577,524578,524580,524581,524582,524583,524584,524585 | 0x80000000,524586 | 0x80000000,524587,524588 | 0x80000000,524589 | 0x80000000,524590,524591,524592,524593,524594,524595,524596,524597,524598,524600,524603,524604,524606,524607,524609,524611,524612,524613 | 0x80000000,524614,524615,524620,524621,524625,524627,524628,524629,524630,524631,524632,524637,524638,524639,524640 | 0x80000000,524641 | 0x80000000,524642,196963,196964,65893,65894,65895,65896,65897,590187,590188,590190 | 0x01000000,590191,590192,590193,590186,590194,590232,590189,655410,590195,590196,590197,590198,590199,590200,590201,590202,590204,590203,590205,590206,590207,590208,590209,590210,590211,590212,590213,590214,590215,590216,590217,590218,590220,590221,590245 | 0x01000000,590219,590223,590222,590224,590225,590226,590227,590228,590229,590230,590231,590233,590234,590235,590236 | 0x01000000,590237,590238,590239,590240,590241,590242,590243,590244,393639,393640,393641,393638,393642,393643,393644,393645,393646,393647,393648,393649,393650,393651,393652,393653,393654,393655,393656,393657,393658 | 0x80000000,393659,393660,393661,393662,393663,393664,393665,393666,721347,655598,655420,524623 | 0x00800000,524622 | 0x00800000,524610 | 0x00800000,524624 | 0x00800000,524599 | 0x00800000,524601 | 0x00800000,524602 | 0x00800000,524605 | 0x00800000,524616 | 0x00800000,524617 | 0x00800000,524618 | 0x00800000,524619 | 0x00800000,524626 | 0x00800000,524633 | 0x00800000,524634 | 0x00800000,524635 | 0x00800000,524636 | 0x00800000,524608 | 0x00800000,524579,0};
unsigned int* get_mime_ids() { return mime_ids; }
#endif

View File

@@ -142,6 +142,10 @@ void parse(parse_job_t *job) {
job->vfile.calculate_checksum = ScanCtx.calculate_checksums;
}
if (IS_SUB_JOB(job)) {
SET_CURRENT_JOB(ProcData.ipc_db->ipc_ctx, job->filepath);
}
document_t *doc = malloc(sizeof(document_t));
strcpy(doc->filepath, job->filepath);

View File

@@ -51,11 +51,11 @@
#include <ctype.h>
#include "git_hash.h"
#define VERSION "3.4.2"
#define VERSION "3.4.5"
static const char *const Version = VERSION;
static const int VersionMajor = 3;
static const int VersionMinor = 4;
static const int VersionPatch = 2;
static const int VersionPatch = 5;
#ifndef SIST_PLATFORM
#define SIST_PLATFORM unknown

View File

@@ -175,9 +175,19 @@ int render_cover(scan_ebook_ctx_t *ctx, fz_context *fzctx, document_t *doc, fz_d
return TRUE;
}
#define IS_IGNORED_MESSAGE(message) \
( \
strstr(message, "invalid glyph index") \
|| strstr(message, "... repeated") \
) \
void fz_err_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_WARNINGF(doc->filepath, "FZ: %s", message);
}
@@ -185,6 +195,10 @@ void fz_err_callback(void *user, const char *message) {
void fz_warn_callback(void *user, const char *message) {
document_t *doc = (document_t *) user;
if (IS_IGNORED_MESSAGE(message)) {
return;
}
const scan_ebook_ctx_t *ctx = &thread_ctx;
CTX_LOG_DEBUGF(doc->filepath, "FZ: %s", message);
}

View File

@@ -223,14 +223,10 @@ read_frame(scan_media_ctx_t *ctx, AVFormatContext *pFormatCtx, AVCodecContext *d
void append_tag_meta_if_not_exists(scan_media_ctx_t *ctx, document_t *doc, AVDictionaryEntry *tag, enum metakey key) {
meta_line_t *meta = doc->meta_head;
while (meta != NULL) {
if (meta->key == key) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s' and '%02x=%s'",
key, meta->str_val, key, tag->value);
return;
}
meta = meta->next;
if (meta_contains_key(doc->meta_head, key)) {
CTX_LOG_DEBUGF(doc->filepath, "Ignoring duplicate tag: '%02x=%s'",
key, tag->value);
return;
}
text_buffer_t tex = text_buffer_create(-1);
@@ -445,7 +441,7 @@ int decode_frame_and_save_thumbnail(scan_media_ctx_t *ctx, AVFormatContext *pFor
return SAVE_THUMBNAIL_FAILED;
}
if (ctx->tesseract_lang != NULL && thumbnail_index == 0) {
if (ctx->tesseract_lang != NULL && thumbnail_index == 0 && !meta_contains_key(doc->meta_head, MetaContent)) {
ocr_image(ctx, doc, decoder, frame_and_packet->frame);
}

View File

@@ -172,6 +172,8 @@ typedef struct {
char filepath[PATH_MAX * 2 + 1];
} parse_job_t;
#define IS_SUB_JOB(job) ((job)->parent[0] != '\0')
#include "util.h"

View File

@@ -392,4 +392,18 @@ static parse_job_t *create_parse_job(const char *filepath, int mtime, size_t st_
return job;
}
static int meta_contains_key (meta_line_t *meta_head, enum metakey key) {
meta_line_t *meta = meta_head;
while (meta != NULL) {
if (meta->key == key) {
return TRUE;
}
meta = meta->next;
}
return FALSE;
}
#endif